# Dataset Exploration & Cleaning

#### Dataset: SLU Opportunity Wise Data.csv

Importing necessary libraries

In [232]:
import numpy as np
import pandas as pd

Loading Dataset

In [233]:
df = pd.read_csv("SLU Opportunity Wise Data.csv")

Dataset Understanding

In [234]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8558 entries, 0 to 8557
Data columns (total 16 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   Learner SignUp DateTime  8558 non-null   object
 1   Opportunity Id           8558 non-null   object
 2   Opportunity Name         8558 non-null   object
 3   Opportunity Category     8558 non-null   object
 4   Opportunity End Date     8558 non-null   object
 5   First Name               8558 non-null   object
 6   Date of Birth            8558 non-null   object
 7   Gender                   8558 non-null   object
 8   Country                  8558 non-null   object
 9   Institution Name         8553 non-null   object
 10  Current/Intended Major   8553 non-null   object
 11  Entry created at         8558 non-null   object
 12  Status Description       8558 non-null   object
 13  Status Code              8558 non-null   int64 
 14  Apply Date               8558 non-null  

In [235]:
df.head()

Unnamed: 0,Learner SignUp DateTime,Opportunity Id,Opportunity Name,Opportunity Category,Opportunity End Date,First Name,Date of Birth,Gender,Country,Institution Name,Current/Intended Major,Entry created at,Status Description,Status Code,Apply Date,Opportunity Start Date
0,06/14/2023 12:30:35,00000000-0GN2-A0AY-7XK8-C5FZPP,Career Essentials: Getting Started with Your P...,Course,06/29/2024 18:52:39,Faria,01/12/2001,Female,Pakistan,Nwihs,Radiology,03/11/2024 12:01:41,Started,1080,06/14/2023 12:36:09,11/03/2022 18:30:39
1,05/01/2023 05:29:16,00000000-0GN2-A0AY-7XK8-C5FZPP,Career Essentials: Getting Started with Your P...,Course,06/29/2024 18:52:39,Poojitha,08/16/2000,Female,India,SAINT LOUIS,Information Systems,03/11/2024 12:01:41,Started,1080,05/01/2023 06:08:21,11/03/2022 18:30:39
2,04/09/2023 20:35:08,00000000-0GN2-A0AY-7XK8-C5FZPP,Career Essentials: Getting Started with Your P...,Course,06/29/2024 18:52:39,Emmanuel,01/27/2002,Male,United States,Illinois Institute of Technology,Computer Science,03/11/2024 12:01:41,Started,1080,05/11/2023 1085640:21:29,11/03/2022 18:30:39
3,08/29/2023 05:20:03,00000000-0GN2-A0AY-7XK8-C5FZPP,Career Essentials: Getting Started with Your P...,Course,06/29/2024 18:52:39,Amrutha Varshini,11/01/1999,Female,United States,Saint Louis University,Information Systems,03/11/2024 12:01:41,Team Allocated,1070,10/09/2023 22:02:42,11/03/2022 18:30:39
4,01/06/2023 15:26:36,00000000-0GN2-A0AY-7XK8-C5FZPP,Career Essentials: Getting Started with Your P...,Course,06/29/2024 18:52:39,Vinay Varshith,04/19/2000,Male,United States,Saint Louis University,Computer Science,03/11/2024 12:01:41,Started,1080,01/06/2023 15:40:10,11/03/2022 18:30:39


In [236]:
print ("Total Rows:", df.shape[0])
print ("Total Columns:", df.shape[1])

Total Rows: 8558
Total Columns: 16


Correcting datatype

In [237]:
# List of date columns
date_cols = ['Learner SignUp DateTime', 'Date of Birth', 'Opportunity End Date', 
             'Entry created at', 'Apply Date', 'Opportunity Start Date']

for col in date_cols:
    # Convert to datetime, infer format, drop time part
    df[col] = pd.to_datetime(df[col], errors='coerce').dt.normalize()  # removes hours, mins, secs
    # Convert back to datetime64[ns] to ensure uniform type
    df[col] = pd.to_datetime(df[col], format='%Y-%m-%d')

Checking Missing Values

In [238]:
df.isnull().sum()

Learner SignUp DateTime     295
Opportunity Id                0
Opportunity Name              0
Opportunity Category          0
Opportunity End Date       1262
First Name                    0
Date of Birth                 0
Gender                        0
Country                       0
Institution Name              5
Current/Intended Major        5
Entry created at              0
Status Description            0
Status Code                   0
Apply Date                  307
Opportunity Start Date     4637
dtype: int64

Filling Missing values with appropriate values

In [239]:
# Fill categorical missing values
df['Institution Name'] = df['Institution Name'].fillna('Unknown')
df['Current/Intended Major'] = df['Current/Intended Major'].fillna('Unknown')

# Fill date columns with mode
date_cols = ['Learner SignUp DateTime', 'Opportunity End Date', 
             'Apply Date', 'Opportunity Start Date']

for col in date_cols:
    mode_date = df[col].mode()[0]  # get most frequent date
    df[col] = df[col].fillna(mode_date)

# Verify no missing values remain
df.isnull().sum()


Learner SignUp DateTime    0
Opportunity Id             0
Opportunity Name           0
Opportunity Category       0
Opportunity End Date       0
First Name                 0
Date of Birth              0
Gender                     0
Country                    0
Institution Name           0
Current/Intended Major     0
Entry created at           0
Status Description         0
Status Code                0
Apply Date                 0
Opportunity Start Date     0
dtype: int64

Checking Duplicate rows if exists

In [240]:
duplicates = df.duplicated()
print("Total duplicate rows:", duplicates.sum())

df[duplicates]

Total duplicate rows: 2


Unnamed: 0,Learner SignUp DateTime,Opportunity Id,Opportunity Name,Opportunity Category,Opportunity End Date,First Name,Date of Birth,Gender,Country,Institution Name,Current/Intended Major,Entry created at,Status Description,Status Code,Apply Date,Opportunity Start Date
1370,2023-08-27,00000000-0GN2-A0AY-7XK8-C5FZPP,Career Essentials: Getting Started with Your P...,Course,2024-06-29,Reddi,2003-12-01,Male,India,Aditya Engineering College,Computer Science and Engineering,2024-03-11,Team Allocated,1070,2023-08-27,2022-11-03
4882,2024-01-19,00000000-0GWQ-AXC5-X45C-2MHJ28,Data Visualization,Internship,2024-03-11,warda,2001-11-18,Female,Pakistan,National University of Sciences and Technology,Data Science,2024-03-11,Rejected,1030,2024-01-19,2022-11-03


Removing Duplicate rows

In [241]:
df = df.drop_duplicates().reset_index(drop=True)

df.duplicated().sum()

np.int64(0)

Checking for inconsistency

In [242]:
# Display unique values count for each column
for col in df.columns:
    print(f"{col}: {df[col].nunique()} unique values")
    
# Optional: display actual unique values for inspection
for col in df.columns:
    print(f"{col} unique values:\n{df[col].unique()}\n")


Learner SignUp DateTime: 410 unique values
Opportunity Id: 23 unique values
Opportunity Name: 22 unique values
Opportunity Category: 5 unique values
Opportunity End Date: 14 unique values
First Name: 3026 unique values
Date of Birth: 2620 unique values
Gender: 4 unique values
Country: 71 unique values
Institution Name: 2090 unique values
Current/Intended Major: 407 unique values
Entry created at: 1 unique values
Status Description: 8 unique values
Status Code: 8 unique values
Apply Date: 433 unique values
Opportunity Start Date: 12 unique values
Learner SignUp DateTime unique values:
<DatetimeArray>
['2023-06-14 00:00:00', '2023-05-01 00:00:00', '2023-04-09 00:00:00',
 '2023-08-29 00:00:00', '2023-01-06 00:00:00', '2024-03-02 00:00:00',
 '2023-05-31 00:00:00', '2023-07-22 00:00:00', '2023-03-20 00:00:00',
 '2023-05-11 00:00:00',
 ...
 '2024-03-06 00:00:00', '2023-11-13 00:00:00', '2023-11-03 00:00:00',
 '2023-04-11 00:00:00', '2023-02-18 00:00:00', '2023-01-26 00:00:00',
 '2023-03-02 0

Ensuring consistency

In [243]:
# Standardize Gender column
df['Gender'] = df['Gender'].str.strip()  # remove leading/trailing spaces
df['Gender'] = df['Gender'].replace({
    "Don'T Want To Specify": "Don't Want to Specify",
    "dont want to specify": "Don't Want to Specify",
    "Other": "Other",
    "Female": "Female",
    "Male": "Male"
})

In [244]:
df['Gender'].unique()

array(['Female', 'Male', "Don't want to specify", 'Other'], dtype=object)

In [245]:
# Define a mapping of inconsistent country names to standardized ones
country_corrections = {
    'Tanzania, United Republic of Tanzania': 'Tanzania',
    'Korea, Republic of South Korea': 'South Korea',
    'Iran, Islamic Republic of Persian Gulf': 'Iran',
    'Iran  Islamic Republic of Persian Gulf': 'Iran',
    'Falkland Islands (Malvinas)': 'Falkland Islands',
    'Virgin Islands, U.S.': 'U.S. Virgin Islands',
    'Libyan Arab Jamahiriya': 'Libya'
}

# Apply corrections
df['Country'] = df['Country'].replace(country_corrections)

In [246]:
df['Country'].unique()

array(['Pakistan', 'India', 'United States', 'United Arab Emirates',
       'Nigeria', 'Egypt', 'Nepal', 'Kenya', 'Ghana', 'Zambia', 'Morocco',
       'Ethiopia', 'Zimbabwe', 'Uganda', 'Indonesia', 'Cameroon', 'Yemen',
       'China', 'Bangladesh', 'Congo', 'Liberia', 'United Kingdom',
       'Vietnam', 'Japan', 'Rwanda', 'Gambia', 'Philippines', 'Australia',
       'Somalia', 'Sierra Leone', 'Lebanon', 'Botswana', 'Iraq',
       'Uzbekistan', 'Turkey', 'Honduras', 'Tanzania',
       'British Indian Ocean Territory', 'France', 'Belarus', 'Algeria',
       'South Korea', 'Mauritius', 'Tunisia', 'Kazakhstan', 'Peru',
       'Brazil', 'Ukraine', 'South Africa', 'Germany', 'Namibia', 'Iran',
       'American Samoa', 'Falkland Islands', 'Saudi Arabia', 'Sri Lanka',
       'Azerbaijan', 'Dominican Republic', 'Lesotho', 'Malaysia',
       'U.S. Virgin Islands', 'Qatar', 'Canada', 'Singapore', 'Ireland',
       'Libya', "Cote d'Ivoire", 'Afghanistan', 'Bhutan', 'Spain'],
      dtype=object)

In [247]:
# Step 1: Basic title case and stripping
df['Current/Intended Major'] = df['Current/Intended Major'].str.title().str.strip()

# Step 2: Mapping for known corrections and typos
major_corrections = {
    # Standardize degrees
    'Bsc': 'BSc',
    'Bsc Cs': 'BSc Computer Science',
    'Bsc In Computer Science': 'BSc Computer Science',
    'Bsc Nursing': 'BSc Nursing',
    'B Tech': 'B.Tech',
    'Btech': 'B.Tech',
    'Mba': 'MBA',
    'Msc': 'MS / MSc',
    'Msters In Healthcare Administration': 'Masters In Healthcare Administration',
    'Msc Cyber Forensics And Security': 'MSc Cyber Forensics And Security',
    'Msc In Information Technology': 'MSc Information Technology',
    'Masters In Computer Science': 'MSc Computer Science',
    'Masters In Information Systems': 'MSc Information Systems',
    'Pursuing Mca': 'MCA',
    'BBA HR': 'BBA In Human Resources',
    'Baf': 'Other',
    'Bsit': 'Other',
    'Ot': 'Other',
    'Te': 'Other',
    'Mmg': 'Other',
    'Bca': 'Other',
    'No': 'Other',
    'Aa': 'Other',
    # Non-academic / placeholder entries
    'Yoganand Sir': 'Other',
    'Shaista': 'Other',
    'Ha': 'Other',
    'Cycw': 'Other',
    'Zack': 'Other',
    'Pv': 'Other',
    'Otheraassss': 'Other',
    'Oth': 'Other',
    'I Am Major': 'Other',
    'Na': 'Other',
    'Ghj': 'Other',
    'Faizan': 'Other',
    'Student': 'Other',
    'Fresher': 'Other',
    'Job': 'Other',
    'Unknown': 'Other',
    'Unemployed': 'Other',
    'Already Graduation Completed': 'Other',
    'To Study': 'Other',
    'After Bachelor': 'Other',
    'Nil': 'Other',
    'Www': 'Other',
    'General': 'Other',
    'Degree': 'Other'
}

df['Current/Intended Major'] = df['Current/Intended Major'].replace(major_corrections)

# Step 3: Regex-based standardization / merging similar fields
major_mapping = {
    r'(?i)^computer science.*$': 'Computer Science',
    r'(?i)^artificial intelligence.*$': 'Artificial Intelligence',
    r'(?i)^cyber.*$': 'Cybersecurity / InfoSec',
    r'(?i)^information systems.*$': 'Information Systems',
    r'(?i)^data science.*$': 'Data Science',
    r'(?i)^supply chain.*$': 'Supply Chain Management',
    r'(?i)^mechanical.*$': 'Mechanical Engineering',
    r'(?i)^electrical.*$': 'Electrical Engineering',
    r'(?i)^business.*$': 'Business Administration',
    r'(?i)^finance.*$': 'Finance',
    r'(?i)^management.*$': 'Management',
    r'(?i)^law.*$': 'Law',
    r'(?i)^health.*management.*$': 'Healthcare Management',
    r'(?i)^msc.*$': 'MS / MSc',
    r'(?i)^mba.*$': 'MBA',
    r'(?i)^b\.?tech.*$': 'B.Tech',
    r'(?i)^bsc.*$': 'BSc'
}

def clean_major(major):
    if pd.isnull(major) or major.strip() == '':
        return 'Other'
    major = major.strip()
    for pattern, replacement in major_mapping.items():
        if pd.Series([major]).str.match(pattern, case=False).any():
            return replacement
    return major

df['Current/Intended Major'] = df['Current/Intended Major'].apply(clean_major)

# Step 4: Map any remaining single-word unclear entries to 'Other'
unclear_entries = ['Baf', 'Bsit', 'Ot', 'Te', 'Mmg', 'Bca', 'Pos Service']
df['Current/Intended Major'] = df['Current/Intended Major'].replace(unclear_entries, 'Other')

# Step 5: Optional – final title case standardization
df['Current/Intended Major'] = df['Current/Intended Major'].str.title()

# Step 6: Print unique values
print(df['Current/Intended Major'].unique())


['Radiology' 'Information Systems' 'Computer Science'
 'Mechanical Engineering' 'Artificial Intelligence'
 'Robotics And Automation Engineering' 'Data Visualization'
 'Business Administration' 'Public Health' 'Architecture' 'Biology'
 'Economics' 'Other' 'Mathematics' 'Bioinformatics'
 'Biomedical Engineering' 'Electrical Engineering'
 'Accounting And Finance' 'Secretarial' 'Data Science' 'Statistics'
 'Electronics And Communication' 'Computer Information Systems'
 'Management' 'Project Management' 'Medicine' 'Information'
 'Information Technology' 'Actuarial Mathematics' 'Software Engineering'
 'Biological Sciences' 'Urban And Housing Development' 'Human Resources'
 'Cybersecurity / Infosec' 'Data Analytics' 'Computer Engineering'
 'Environmental Sciences' 'Philosophy' 'Law' 'Industrial Engineering'
 'Theology Or Divinity And Religious Studies' 'Agriculture And Forestry'
 'International Business Management' 'Politics' 'Marketing'
 'Pure And Applied Physics' 'Biochemistry'
 'Informatio

In [248]:
# Final cleanup mapping
final_corrections = {
    'Eee': 'Electrical And Electronic Engineering',
    'Pol': 'Political Science',
    'Bba Hr': 'BBA In Human Resources',
    'Ise': 'Information Systems Engineering',
    'It': 'Information Technology',
    'Othe': 'Other',
    'Sdada': 'Other',
    'Non': 'Other',
    'Data Sceince': 'Data Science'
}

df['Current/Intended Major'] = df['Current/Intended Major'].replace(final_corrections)

# Map any remaining non-standard / one-word unclear entries to 'Other'
unclear_entries = ['Baf', 'Bsit', 'Ot', 'Te', 'Mmg', 'Bca', 'Pos Service', 'Na', 'Faizan', 'Ghj']
df['Current/Intended Major'] = df['Current/Intended Major'].replace(unclear_entries, 'Other')

In [250]:
final_mapping = {
    'Not Know Ab': 'Other',
    'Dropped Out': 'Other',
    'Could Computing': 'Cloud Computing',
    'Statistics And Computeri Science': 'Statistics And Computer Science',
    'Computer And Infromation Sciences': 'Computer And Information Sciences',
    'Human Resource': 'Human Resources',
    'B Sc': 'BSc',
    'Bsc': 'BSc',
    'Ms In Supply Chain Management': 'MS / MSc',
    'Ms Biostatistics And Health Analytics': 'MS / MSc',
    'Ms In Cybersecurity': 'MS / MSc',
    'Ms In Analytics': 'MS / MSc'
}

df['Current/Intended Major'] = df['Current/Intended Major'].replace(final_mapping)

In [251]:
df['Current/Intended Major'].unique()

array(['Radiology', 'Information Systems', 'Computer Science',
       'Mechanical Engineering', 'Artificial Intelligence',
       'Robotics And Automation Engineering', 'Data Visualization',
       'Business Administration', 'Public Health', 'Architecture',
       'Biology', 'Economics', 'Other', 'Mathematics', 'Bioinformatics',
       'Biomedical Engineering', 'Electrical Engineering',
       'Accounting And Finance', 'Secretarial', 'Data Science',
       'Statistics', 'Electronics And Communication',
       'Computer Information Systems', 'Management', 'Project Management',
       'Medicine', 'Information', 'Information Technology',
       'Actuarial Mathematics', 'Software Engineering',
       'Biological Sciences', 'Urban And Housing Development',
       'Human Resources', 'Cybersecurity / Infosec', 'Data Analytics',
       'Computer Engineering', 'Environmental Sciences', 'Philosophy',
       'Law', 'Industrial Engineering',
       'Theology Or Divinity And Religious Studies',
   

In [252]:
# Define mapping: incorrect → correct institution names
institution_mapping = {
    'Nwihs': 'Northwest Institute of Health Sciences',
    'SAINT LOUIS': 'Saint Louis University',
    'Jawaharlal Nehru technological University of Hyderabad': 'Jawaharlal Nehru Technological University, Hyderabad',
    'Kwara state University': 'Kwara State University',
    'Tai Solarin university of Education': 'Tai Solarin University of Education'
}

# Apply mapping to the DataFrame column
df['Institution Name'] = df['Institution Name'].replace(institution_mapping)

# Optional: standardize capitalization (title case)
df['Institution Name'] = df['Institution Name'].str.title()

In [253]:
df['Institution Name'].unique()

array(['Northwest Institute Of Health Sciences', 'Saint Louis University',
       'Illinois Institute Of Technology', ...,
       'Metea Valley High School',
       'Dhanalakshmi Srinivasan Engineering College Perambalur',
       'Jawaharlal Nehru Technological University, Hyderabad'],
      shape=(1819,), dtype=object)

Final Checking of dataset

In [255]:
df.isnull().sum()

Learner SignUp DateTime    0
Opportunity Id             0
Opportunity Name           0
Opportunity Category       0
Opportunity End Date       0
First Name                 0
Date of Birth              0
Gender                     0
Country                    0
Institution Name           0
Current/Intended Major     0
Entry created at           0
Status Description         0
Status Code                0
Apply Date                 0
Opportunity Start Date     0
dtype: int64

In [256]:
duplicates = df.duplicated()
print("Total duplicate rows:", duplicates.sum())

Total duplicate rows: 4


In [257]:
# Remove exact duplicate rows
df = df.drop_duplicates()

In [258]:
df.duplicated().sum()

np.int64(0)

In [261]:
print ("Total Rows:", df.shape[0])
print ("Total Columns:", df.shape[1])

Total Rows: 8552
Total Columns: 16


In [266]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8552 entries, 0 to 8555
Data columns (total 16 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Learner SignUp DateTime  8552 non-null   datetime64[ns]
 1   Opportunity Id           8552 non-null   object        
 2   Opportunity Name         8552 non-null   object        
 3   Opportunity Category     8552 non-null   object        
 4   Opportunity End Date     8552 non-null   datetime64[ns]
 5   First Name               8552 non-null   object        
 6   Date of Birth            8552 non-null   datetime64[ns]
 7   Gender                   8552 non-null   object        
 8   Country                  8552 non-null   object        
 9   Institution Name         8552 non-null   object        
 10  Current/Intended Major   8552 non-null   object        
 11  Entry created at         8552 non-null   datetime64[ns]
 12  Status Description       8552 non-null 

Final dataset overview

In [265]:
df.head(10)

Unnamed: 0,Learner SignUp DateTime,Opportunity Id,Opportunity Name,Opportunity Category,Opportunity End Date,First Name,Date of Birth,Gender,Country,Institution Name,Current/Intended Major,Entry created at,Status Description,Status Code,Apply Date,Opportunity Start Date
0,2023-06-14,00000000-0GN2-A0AY-7XK8-C5FZPP,Career Essentials: Getting Started with Your P...,Course,2024-06-29,Faria,2001-01-12,Female,Pakistan,Northwest Institute Of Health Sciences,Radiology,2024-03-11,Started,1080,2023-06-14,2022-11-03
1,2023-05-01,00000000-0GN2-A0AY-7XK8-C5FZPP,Career Essentials: Getting Started with Your P...,Course,2024-06-29,Poojitha,2000-08-16,Female,India,Saint Louis University,Information Systems,2024-03-11,Started,1080,2023-05-01,2022-11-03
2,2023-04-09,00000000-0GN2-A0AY-7XK8-C5FZPP,Career Essentials: Getting Started with Your P...,Course,2024-06-29,Emmanuel,2002-01-27,Male,United States,Illinois Institute Of Technology,Computer Science,2024-03-11,Started,1080,2024-01-05,2022-11-03
3,2023-08-29,00000000-0GN2-A0AY-7XK8-C5FZPP,Career Essentials: Getting Started with Your P...,Course,2024-06-29,Amrutha Varshini,1999-11-01,Female,United States,Saint Louis University,Information Systems,2024-03-11,Team Allocated,1070,2023-10-09,2022-11-03
4,2023-01-06,00000000-0GN2-A0AY-7XK8-C5FZPP,Career Essentials: Getting Started with Your P...,Course,2024-06-29,Vinay Varshith,2000-04-19,Male,United States,Saint Louis University,Computer Science,2024-03-11,Started,1080,2023-01-06,2022-11-03
5,2024-03-02,00000000-0GN2-A0AY-7XK8-C5FZPP,Career Essentials: Getting Started with Your P...,Course,2024-06-29,Mor,1996-05-12,Male,India,Saint Louis University,Mechanical Engineering,2024-03-11,Waitlisted,1040,2024-03-02,2022-11-03
6,2023-05-31,00000000-0GN2-A0AY-7XK8-C5FZPP,Career Essentials: Getting Started with Your P...,Course,2024-06-29,Fardeen,2001-09-09,Male,India,Illinois Institute Of Technology,Computer Science,2024-03-11,Withdraw,1110,2023-06-14,2022-11-03
7,2023-07-22,00000000-0GN2-A0AY-7XK8-C5FZPP,Career Essentials: Getting Started with Your P...,Course,2024-06-29,Gauri,2006-02-27,Female,United Arab Emirates,Gems New Millennium School Al Khail,Artificial Intelligence,2024-03-11,Team Allocated,1070,2023-07-22,2022-11-03
8,2023-03-20,00000000-0GN2-A0AY-7XK8-C5FZPP,Career Essentials: Getting Started with Your P...,Course,2024-06-29,SIDDHARTH,2005-12-22,Male,India,Ashoka Academy,Robotics And Automation Engineering,2024-03-11,Started,1080,2023-05-24,2022-11-03
9,2023-05-11,00000000-0GN2-A0AY-7XK8-C5FZPP,Career Essentials: Getting Started with Your P...,Course,2024-06-29,Vanshika,1998-06-26,Female,India,Illinois Institute Of Technology,Computer Science,2024-03-11,Team Allocated,1070,2023-12-17,2022-11-03


Exporting Cleaned Dataset

In [231]:
df.to_csv("Cleaned_SLU_Opportunity_Wise_Data.csv", index=False)