In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
import sys
from pathlib import Path

# Add the src directory to the Python path
sys.path.append(str(Path().resolve().parent / 'src'))

# Now you can import the clean_data function
from data_cleaning import drop_columns, impute_missing_values

In [4]:
data = pd.read_csv("../input/train_folds.csv")
data.head()

Unnamed: 0,Prospect ID,Lead Number,Lead Origin,Lead Source,Do Not Email,Do Not Call,Converted,TotalVisits,Total Time Spent on Website,Page Views Per Visit,Last Activity,Country,Specialization,How did you hear about X Education,What is your current occupation,What matters most to you in choosing a course,Search,Magazine,Newspaper Article,X Education Forums,Newspaper,Digital Advertisement,Through Recommendations,Receive More Updates About Our Courses,Tags,Lead Quality,Update me on Supply Chain Content,Get updates on DM Content,Lead Profile,City,Asymmetrique Activity Index,Asymmetrique Profile Index,Asymmetrique Activity Score,Asymmetrique Profile Score,I agree to pay the amount through cheque,A free copy of Mastering The Interview,Last Notable Activity,kfold
0,8b94b8c4-7107-4c12-8a66-531d0ed974c4,615582,Landing Page Submission,Google,No,No,1,8.0,252,2.67,Email Opened,India,Finance Management,Select,Unemployed,Better Career Prospects,No,No,No,No,No,No,No,No,Closed by Horizzon,,No,No,Select,Mumbai,,,,,No,Yes,Email Opened,4
1,a4c0a4e4-c5d8-4e48-8a4f-030611dde534,588939,Landing Page Submission,Direct Traffic,Yes,No,0,2.0,929,2.0,Email Bounced,India,Marketing Management,,,,No,No,No,No,No,No,No,No,,,No,No,,Other Cities,02.Medium,02.Medium,14.0,16.0,No,Yes,Modified,1
2,889c23c8-84d0-4936-97ad-7b438dc2e2d7,621242,Landing Page Submission,Direct Traffic,Yes,No,0,1.0,2,1.0,Email Bounced,India,,,Student,,No,No,No,No,No,No,No,No,,,No,No,,Other Cities,,,,,No,No,Modified,3
3,85dfdbea-fb6c-4428-a59c-59155bedea4a,589803,Landing Page Submission,Direct Traffic,No,No,0,2.0,323,2.0,Email Opened,India,Select,Select,Working Professional,Better Career Prospects,No,No,No,No,No,No,No,No,Ringing,Not Sure,No,No,Select,Mumbai,02.Medium,01.High,14.0,18.0,No,No,Email Opened,4
4,cbc292d8-752f-47ea-92c6-07d4247638d5,651441,Landing Page Submission,Google,No,No,0,3.0,201,3.0,Email Opened,India,Marketing Management,Online Search,,,No,No,No,No,No,No,No,No,,,No,No,,Mumbai,02.Medium,01.High,13.0,18.0,No,No,Email Opened,1


In [5]:
df = drop_columns(data)
df.head()

Unnamed: 0,Lead Origin,Lead Source,Do Not Email,Do Not Call,Converted,TotalVisits,Total Time Spent on Website,Page Views Per Visit,Last Activity,Search,Newspaper Article,X Education Forums,Newspaper,Digital Advertisement,Through Recommendations,A free copy of Mastering The Interview,Last Notable Activity,kfold
0,Landing Page Submission,Google,No,No,1,8.0,252,2.67,Email Opened,No,No,No,No,No,No,Yes,Email Opened,4
1,Landing Page Submission,Direct Traffic,Yes,No,0,2.0,929,2.0,Email Bounced,No,No,No,No,No,No,Yes,Modified,1
2,Landing Page Submission,Direct Traffic,Yes,No,0,1.0,2,1.0,Email Bounced,No,No,No,No,No,No,No,Modified,3
3,Landing Page Submission,Direct Traffic,No,No,0,2.0,323,2.0,Email Opened,No,No,No,No,No,No,No,Email Opened,4
4,Landing Page Submission,Google,No,No,0,3.0,201,3.0,Email Opened,No,No,No,No,No,No,No,Email Opened,1


In [6]:
for fold in range(5):
    fold_df = impute_missing_values(df, fold)
    print(fold_df.shape)

(9240, 18)
(9240, 18)
(9240, 18)
(9240, 18)
(9240, 18)


In [7]:
first_fold = impute_missing_values(df, 0)
first_fold.head()

Unnamed: 0,Lead Origin,Lead Source,Do Not Email,Do Not Call,Converted,TotalVisits,Total Time Spent on Website,Page Views Per Visit,Last Activity,Search,Newspaper Article,X Education Forums,Newspaper,Digital Advertisement,Through Recommendations,A free copy of Mastering The Interview,Last Notable Activity,kfold
0,Landing Page Submission,Google,No,No,1,8.0,252,2.67,Email Opened,No,No,No,No,No,No,Yes,Email Opened,4
1,Landing Page Submission,Direct Traffic,Yes,No,0,2.0,929,2.0,Email Bounced,No,No,No,No,No,No,Yes,Modified,1
2,Landing Page Submission,Direct Traffic,Yes,No,0,1.0,2,1.0,Email Bounced,No,No,No,No,No,No,No,Modified,3
3,Landing Page Submission,Direct Traffic,No,No,0,2.0,323,2.0,Email Opened,No,No,No,No,No,No,No,Email Opened,4
4,Landing Page Submission,Google,No,No,0,3.0,201,3.0,Email Opened,No,No,No,No,No,No,No,Email Opened,1


In [8]:
mapping = {
    'Yes': 1,
    'No': 0
}

binary_columns = [col for col in df.columns if df[col].nunique() == 2 and df[col].dtype == 'O']
first_fold[binary_columns] = first_fold[binary_columns].apply(lambda x: x.map(mapping))

first_fold.head()

Unnamed: 0,Lead Origin,Lead Source,Do Not Email,Do Not Call,Converted,TotalVisits,Total Time Spent on Website,Page Views Per Visit,Last Activity,Search,Newspaper Article,X Education Forums,Newspaper,Digital Advertisement,Through Recommendations,A free copy of Mastering The Interview,Last Notable Activity,kfold
0,Landing Page Submission,Google,0,0,1,8.0,252,2.67,Email Opened,0,0,0,0,0,0,1,Email Opened,4
1,Landing Page Submission,Direct Traffic,1,0,0,2.0,929,2.0,Email Bounced,0,0,0,0,0,0,1,Modified,1
2,Landing Page Submission,Direct Traffic,1,0,0,1.0,2,1.0,Email Bounced,0,0,0,0,0,0,0,Modified,3
3,Landing Page Submission,Direct Traffic,0,0,0,2.0,323,2.0,Email Opened,0,0,0,0,0,0,0,Email Opened,4
4,Landing Page Submission,Google,0,0,0,3.0,201,3.0,Email Opened,0,0,0,0,0,0,0,Email Opened,1


In [9]:
first_fold.dtypes

Lead Origin                               object
Lead Source                               object
Do Not Email                               int64
Do Not Call                                int64
Converted                                 object
TotalVisits                               object
Total Time Spent on Website               object
Page Views Per Visit                      object
Last Activity                             object
Search                                     int64
Newspaper Article                          int64
X Education Forums                         int64
Newspaper                                  int64
Digital Advertisement                      int64
Through Recommendations                    int64
A free copy of Mastering The Interview     int64
Last Notable Activity                     object
kfold                                     object
dtype: object

In [10]:
# Convert specific object columns to numeric
numerical_columns = ['Converted', 'TotalVisits', 'Total Time Spent on Website', 'Page Views Per Visit', 'kfold']
first_fold[numerical_columns] = first_fold[numerical_columns].apply(pd.to_numeric, errors='coerce')

# Verify the conversion
print(first_fold[numerical_columns].dtypes)

# remove converted and kfold columns
numerical_columns.remove('Converted')
numerical_columns.remove('kfold')

numerical_columns

Converted                        int64
TotalVisits                    float64
Total Time Spent on Website      int64
Page Views Per Visit           float64
kfold                            int64
dtype: object


['TotalVisits', 'Total Time Spent on Website', 'Page Views Per Visit']

In [11]:
# Test and train data
train_data = first_fold[first_fold.kfold != 0].reset_index(drop=True)
test_data = first_fold[first_fold.kfold == 0].reset_index(drop=True)

In [12]:
# Scale the numerical columns
scaler = StandardScaler()
train_data[numerical_columns] = scaler.fit_transform(train_data[numerical_columns])
test_data[numerical_columns] = scaler.transform(test_data[numerical_columns])

train_data.head()

Unnamed: 0,Lead Origin,Lead Source,Do Not Email,Do Not Call,Converted,TotalVisits,Total Time Spent on Website,Page Views Per Visit,Last Activity,Search,Newspaper Article,X Education Forums,Newspaper,Digital Advertisement,Through Recommendations,A free copy of Mastering The Interview,Last Notable Activity,kfold
0,Landing Page Submission,Google,0,0,1,0.89772,-0.430145,0.14624,Email Opened,0,0,0,0,0,0,1,Email Opened,4
1,Landing Page Submission,Direct Traffic,1,0,0,-0.277165,0.805946,-0.15873,Email Bounced,0,0,0,0,0,0,1,Modified,1
2,Landing Page Submission,Direct Traffic,1,0,0,-0.472979,-0.886604,-0.613909,Email Bounced,0,0,0,0,0,0,0,Modified,3
3,Landing Page Submission,Direct Traffic,0,0,0,-0.277165,-0.300511,-0.15873,Email Opened,0,0,0,0,0,0,0,Email Opened,4
4,Landing Page Submission,Google,0,0,0,-0.081351,-0.523263,0.296449,Email Opened,0,0,0,0,0,0,0,Email Opened,1


In [13]:
# Categorical columns
categorical_columns = [col for col in first_fold.columns if first_fold[col].dtype == 'O']
categorical_columns

['Lead Origin', 'Lead Source', 'Last Activity', 'Last Notable Activity']

In [14]:
from sklearn.preprocessing import OneHotEncoder

# Initialize OneHotEncoder with handle_unknown='ignore'
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore', drop='first')

# Fit and transform the train data
encoded_cols_train = pd.DataFrame(encoder.fit_transform(train_data[categorical_columns]))

# Transform the test data
encoded_cols_test = pd.DataFrame(encoder.transform(test_data[categorical_columns]))

# Assign the correct column names to the encoded dataframes
encoded_cols_train.columns = encoder.get_feature_names_out(categorical_columns)
encoded_cols_test.columns = encoder.get_feature_names_out(categorical_columns)

# Set the index to match the original train and test data
encoded_cols_train.index = train_data.index
encoded_cols_test.index = test_data.index

# Drop the original categorical columns
train_data = train_data.drop(categorical_columns, axis=1)
test_data = test_data.drop(categorical_columns, axis=1)

# Concatenate the encoded columns back to the original dataframe
train_data = pd.concat([train_data, encoded_cols_train], axis=1)
test_data = pd.concat([test_data, encoded_cols_test], axis=1)

# Verify the changes
train_data.head()



Unnamed: 0,Do Not Email,Do Not Call,Converted,TotalVisits,Total Time Spent on Website,Page Views Per Visit,Search,Newspaper Article,X Education Forums,Newspaper,Digital Advertisement,Through Recommendations,A free copy of Mastering The Interview,kfold,Lead Origin_Landing Page Submission,Lead Origin_Lead Add Form,Lead Origin_Lead Import,Lead Origin_Quick Add Form,Lead Source_Direct Traffic,Lead Source_Facebook,Lead Source_Google,Lead Source_Live Chat,Lead Source_NC_EDM,Lead Source_Olark Chat,Lead Source_Organic Search,Lead Source_Press_Release,Lead Source_Reference,Lead Source_Referral Sites,Lead Source_Social Media,Lead Source_Welingak Website,Lead Source_bing,Lead Source_blog,Lead Source_google,Lead Source_testone,Lead Source_welearnblog_Home,Last Activity_Converted to Lead,Last Activity_Email Bounced,Last Activity_Email Link Clicked,Last Activity_Email Marked Spam,Last Activity_Email Opened,Last Activity_Email Received,Last Activity_Form Submitted on Website,Last Activity_Had a Phone Conversation,Last Activity_Olark Chat Conversation,Last Activity_Page Visited on Website,Last Activity_Resubscribed to emails,Last Activity_SMS Sent,Last Activity_Unreachable,Last Activity_Unsubscribed,Last Activity_View in browser link Clicked,Last Activity_Visited Booth in Tradeshow,Last Notable Activity_Email Link Clicked,Last Notable Activity_Email Marked Spam,Last Notable Activity_Email Opened,Last Notable Activity_Email Received,Last Notable Activity_Form Submitted on Website,Last Notable Activity_Had a Phone Conversation,Last Notable Activity_Modified,Last Notable Activity_Olark Chat Conversation,Last Notable Activity_Page Visited on Website,Last Notable Activity_Resubscribed to emails,Last Notable Activity_SMS Sent,Last Notable Activity_Unreachable,Last Notable Activity_Unsubscribed
0,0,0,1,0.89772,-0.430145,0.14624,0,0,0,0,0,0,1,4,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,0,0,-0.277165,0.805946,-0.15873,0,0,0,0,0,0,1,1,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,0,0,-0.472979,-0.886604,-0.613909,0,0,0,0,0,0,0,3,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0,0,0,-0.277165,-0.300511,-0.15873,0,0,0,0,0,0,0,4,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0,0,0,-0.081351,-0.523263,0.296449,0,0,0,0,0,0,0,1,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
test_data.head()

Unnamed: 0,Do Not Email,Do Not Call,Converted,TotalVisits,Total Time Spent on Website,Page Views Per Visit,Search,Newspaper Article,X Education Forums,Newspaper,Digital Advertisement,Through Recommendations,A free copy of Mastering The Interview,kfold,Lead Origin_Landing Page Submission,Lead Origin_Lead Add Form,Lead Origin_Lead Import,Lead Origin_Quick Add Form,Lead Source_Direct Traffic,Lead Source_Facebook,Lead Source_Google,Lead Source_Live Chat,Lead Source_NC_EDM,Lead Source_Olark Chat,Lead Source_Organic Search,Lead Source_Press_Release,Lead Source_Reference,Lead Source_Referral Sites,Lead Source_Social Media,Lead Source_Welingak Website,Lead Source_bing,Lead Source_blog,Lead Source_google,Lead Source_testone,Lead Source_welearnblog_Home,Last Activity_Converted to Lead,Last Activity_Email Bounced,Last Activity_Email Link Clicked,Last Activity_Email Marked Spam,Last Activity_Email Opened,Last Activity_Email Received,Last Activity_Form Submitted on Website,Last Activity_Had a Phone Conversation,Last Activity_Olark Chat Conversation,Last Activity_Page Visited on Website,Last Activity_Resubscribed to emails,Last Activity_SMS Sent,Last Activity_Unreachable,Last Activity_Unsubscribed,Last Activity_View in browser link Clicked,Last Activity_Visited Booth in Tradeshow,Last Notable Activity_Email Link Clicked,Last Notable Activity_Email Marked Spam,Last Notable Activity_Email Opened,Last Notable Activity_Email Received,Last Notable Activity_Form Submitted on Website,Last Notable Activity_Had a Phone Conversation,Last Notable Activity_Modified,Last Notable Activity_Olark Chat Conversation,Last Notable Activity_Page Visited on Website,Last Notable Activity_Resubscribed to emails,Last Notable Activity_SMS Sent,Last Notable Activity_Unreachable,Last Notable Activity_Unsubscribed
0,0,0,0,-0.081351,-0.181832,-0.38632,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,0,0,1,0.310278,1.914229,1.206807,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0,0,0,-0.277165,-0.846436,-0.15873,0,0,0,0,0,0,1,0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,0,0,1,-0.277165,1.534455,-0.15873,0,0,0,0,0,0,1,0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0,0,1,0.506092,1.061563,1.661987,0,0,0,0,0,0,1,0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
final_df = pd.concat([train_data, test_data], axis=0)
final_df.head()

Unnamed: 0,Do Not Email,Do Not Call,Converted,TotalVisits,Total Time Spent on Website,Page Views Per Visit,Search,Newspaper Article,X Education Forums,Newspaper,Digital Advertisement,Through Recommendations,A free copy of Mastering The Interview,kfold,Lead Origin_Landing Page Submission,Lead Origin_Lead Add Form,Lead Origin_Lead Import,Lead Origin_Quick Add Form,Lead Source_Direct Traffic,Lead Source_Facebook,Lead Source_Google,Lead Source_Live Chat,Lead Source_NC_EDM,Lead Source_Olark Chat,Lead Source_Organic Search,Lead Source_Press_Release,Lead Source_Reference,Lead Source_Referral Sites,Lead Source_Social Media,Lead Source_Welingak Website,Lead Source_bing,Lead Source_blog,Lead Source_google,Lead Source_testone,Lead Source_welearnblog_Home,Last Activity_Converted to Lead,Last Activity_Email Bounced,Last Activity_Email Link Clicked,Last Activity_Email Marked Spam,Last Activity_Email Opened,Last Activity_Email Received,Last Activity_Form Submitted on Website,Last Activity_Had a Phone Conversation,Last Activity_Olark Chat Conversation,Last Activity_Page Visited on Website,Last Activity_Resubscribed to emails,Last Activity_SMS Sent,Last Activity_Unreachable,Last Activity_Unsubscribed,Last Activity_View in browser link Clicked,Last Activity_Visited Booth in Tradeshow,Last Notable Activity_Email Link Clicked,Last Notable Activity_Email Marked Spam,Last Notable Activity_Email Opened,Last Notable Activity_Email Received,Last Notable Activity_Form Submitted on Website,Last Notable Activity_Had a Phone Conversation,Last Notable Activity_Modified,Last Notable Activity_Olark Chat Conversation,Last Notable Activity_Page Visited on Website,Last Notable Activity_Resubscribed to emails,Last Notable Activity_SMS Sent,Last Notable Activity_Unreachable,Last Notable Activity_Unsubscribed
0,0,0,1,0.89772,-0.430145,0.14624,0,0,0,0,0,0,1,4,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,0,0,-0.277165,0.805946,-0.15873,0,0,0,0,0,0,1,1,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,0,0,-0.472979,-0.886604,-0.613909,0,0,0,0,0,0,0,3,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0,0,0,-0.277165,-0.300511,-0.15873,0,0,0,0,0,0,0,4,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0,0,0,-0.081351,-0.523263,0.296449,0,0,0,0,0,0,0,1,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
final_df.shape

(9240, 64)

In [18]:
final_df.isna().sum().sum()

np.int64(0)