In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from io import BytesIO

In [None]:
# URL to the raw Excel file on GitHub
url = 'https://raw.githubusercontent.com/norahsanad/Excel/main/Customer%20Call%20List.xlsx'

# Fetch the file
response = requests.get(url)
file_data = BytesIO(response.content)

# Read the Excel file into a DataFrame
df = pd.read_excel(file_data, engine='openpyxl')

# Display the first few rows of the DataFrame
df.head()


Unnamed: 0,CustomerID,First_Name,Last_Name,Phone_Number,Address,Paying Customer,Do_Not_Contact,Not_Useful_Column
0,1001,Frodo,Baggins,1234568000.0,"123 Shire Lane, Shire",Yes,No,True
1,1002,Abed,Nadir,9876543000.0,93 West Main Street,No,Yes,False
2,1003,Walter,White,,298 Drugs Driveway,N,,True
3,1004,Dwight,Schrute,1235432000.0,"980 Paper Avenue, Pennsylvania, 18503",Yes,Y,True
4,1005,Jon,Snow,8766783000.0,123 Dragons Road,Y,No,True


# **NOTE:**

Upon reviewing the dataset, I identified two duplicate rows. Additionally, the 'Last Name' column contains extra characters such as '_', '...', and '/'. Furthermore, there are missing values in the 'Phone Number' and 'Do Not Contact' columns. Moreover, the 'Paying Customer' and 'Do Not Contact' columns contain incorrect values that need to be replaced. Finally, there is a column that is unnecessary and can be removed.


# Cleaning:

In [None]:
# Remove duplicate rows from the DataFrame 'df'
df = df.drop_duplicates()
df

Unnamed: 0,CustomerID,First_Name,Last_Name,Phone_Number,Address,Paying Customer,Do_Not_Contact,Not_Useful_Column
0,1001,Frodo,Baggins,1234568000.0,"123 Shire Lane, Shire",Yes,No,True
1,1002,Abed,Nadir,9876543000.0,93 West Main Street,No,Yes,False
2,1003,Walter,White,,298 Drugs Driveway,N,,True
3,1004,Dwight,Schrute,1235432000.0,"980 Paper Avenue, Pennsylvania, 18503",Yes,Y,True
4,1005,Jon,Snow,8766783000.0,123 Dragons Road,Y,No,True
5,1006,Ron,Swanson,3689753000.0,768 City Parkway,Yes,Yes,True
6,1007,Jeff,Winger,,1209 South Street,No,No,False
7,1008,Sherlock,Holmes,8766783000.0,98 Clue Drive,N,No,False
8,1009,Gandalf,,,123 Middle Earth,Yes,,False
9,1010,Peter,Parker,1235455000.0,"25th Main Street, New York",Yes,No,True


In [None]:
# Remove the column 'Not_Useful_Column' from the DataFrame 'df
df.drop('Not_Useful_Column', axis=1, inplace=True)

#Display the DataFrame
df

Unnamed: 0,CustomerID,First_Name,Last_Name,Phone_Number,Address,Paying Customer,Do_Not_Contact
0,1001,Frodo,Baggins,1234568000.0,"123 Shire Lane, Shire",Yes,No
1,1002,Abed,Nadir,9876543000.0,93 West Main Street,No,Yes
2,1003,Walter,White,,298 Drugs Driveway,N,
3,1004,Dwight,Schrute,1235432000.0,"980 Paper Avenue, Pennsylvania, 18503",Yes,Y
4,1005,Jon,Snow,8766783000.0,123 Dragons Road,Y,No
5,1006,Ron,Swanson,3689753000.0,768 City Parkway,Yes,Yes
6,1007,Jeff,Winger,,1209 South Street,No,No
7,1008,Sherlock,Holmes,8766783000.0,98 Clue Drive,N,No
8,1009,Gandalf,,,123 Middle Earth,Yes,
9,1010,Peter,Parker,1235455000.0,"25th Main Street, New York",Yes,No


In [None]:
# Strip whitespace from the 'Last_Name' column.
df['Last_Name'] = df['Last_Name'].str.strip()

# Left strip ('/') from the 'Last_Name' column.
df['Last_Name'] = df['Last_Name'].str.lstrip('/')

# Left strip ('...') from the 'Last_Name' column.
df['Last_Name'] = df['Last_Name'].str.lstrip('...')

# Right strip ('_') from the 'Last_Name' column.
df['Last_Name'] = df['Last_Name'].str.rstrip('_')

#Display the DataFrame
df


Unnamed: 0,CustomerID,First_Name,Last_Name,Phone_Number,Address,Paying Customer,Do_Not_Contact
0,1001,Frodo,Baggins,1234568000.0,"123 Shire Lane, Shire",Yes,No
1,1002,Abed,Nadir,9876543000.0,93 West Main Street,No,Yes
2,1003,Walter,White,,298 Drugs Driveway,N,
3,1004,Dwight,Schrute,1235432000.0,"980 Paper Avenue, Pennsylvania, 18503",Yes,Y
4,1005,Jon,Snow,8766783000.0,123 Dragons Road,Y,No
5,1006,Ron,Swanson,3689753000.0,768 City Parkway,Yes,Yes
6,1007,Jeff,Winger,,1209 South Street,No,No
7,1008,Sherlock,Holmes,8766783000.0,98 Clue Drive,N,No
8,1009,Gandalf,,,123 Middle Earth,Yes,
9,1010,Peter,Parker,1235455000.0,"25th Main Street, New York",Yes,No


In [None]:
# Replace 'Yes' with 'Y' and 'No' with 'N' in the 'Paying Customer' column.
df['Paying Customer'] = df['Paying Customer'].str.replace('Yes', 'Y')
df['Paying Customer'] = df['Paying Customer'].str.replace('No', 'N')

#Display the DataFrame
df

Unnamed: 0,CustomerID,First_Name,Last_Name,Phone_Number,Address,Paying Customer,Do_Not_Contact
0,1001,Frodo,Baggins,1234568000.0,"123 Shire Lane, Shire",Y,No
1,1002,Abed,Nadir,9876543000.0,93 West Main Street,N,Yes
2,1003,Walter,White,,298 Drugs Driveway,N,
3,1004,Dwight,Schrute,1235432000.0,"980 Paper Avenue, Pennsylvania, 18503",Y,Y
4,1005,Jon,Snow,8766783000.0,123 Dragons Road,Y,No
5,1006,Ron,Swanson,3689753000.0,768 City Parkway,Y,Yes
6,1007,Jeff,Winger,,1209 South Street,N,No
7,1008,Sherlock,Holmes,8766783000.0,98 Clue Drive,N,No
8,1009,Gandalf,,,123 Middle Earth,Y,
9,1010,Peter,Parker,1235455000.0,"25th Main Street, New York",Y,No


In [None]:
# Replace 'Yes' with 'Y' and 'No' with 'N' in the 'Do_Not_Contact' column.
df['Do_Not_Contact'] = df['Do_Not_Contact'].str.replace('Yes', 'Y')
df['Do_Not_Contact'] = df['Do_Not_Contact'].str.replace('No', 'N')

# Display the updated DataFrame
df

Unnamed: 0,CustomerID,First_Name,Last_Name,Phone_Number,Address,Paying Customer,Do_Not_Contact
0,1001,Frodo,Baggins,1234568000.0,"123 Shire Lane, Shire",Y,N
1,1002,Abed,Nadir,9876543000.0,93 West Main Street,N,Y
2,1003,Walter,White,,298 Drugs Driveway,N,
3,1004,Dwight,Schrute,1235432000.0,"980 Paper Avenue, Pennsylvania, 18503",Y,Y
4,1005,Jon,Snow,8766783000.0,123 Dragons Road,Y,N
5,1006,Ron,Swanson,3689753000.0,768 City Parkway,Y,Y
6,1007,Jeff,Winger,,1209 South Street,N,N
7,1008,Sherlock,Holmes,8766783000.0,98 Clue Drive,N,N
8,1009,Gandalf,,,123 Middle Earth,Y,
9,1010,Peter,Parker,1235455000.0,"25th Main Street, New York",Y,N


In [None]:
# Fill missing values (NaN) in the DataFrame 'df' with an empty string ('').
df = df.fillna('')

# Display the updated DataFrame
df

Unnamed: 0,CustomerID,First_Name,Last_Name,Phone_Number,Address,Paying Customer,Do_Not_Contact
0,1001,Frodo,Baggins,1234567890.0,"123 Shire Lane, Shire",Y,N
1,1002,Abed,Nadir,9876543213.0,93 West Main Street,N,Y
2,1003,Walter,White,,298 Drugs Driveway,N,
3,1004,Dwight,Schrute,1235432345.0,"980 Paper Avenue, Pennsylvania, 18503",Y,Y
4,1005,Jon,Snow,8766783469.0,123 Dragons Road,Y,N
5,1006,Ron,Swanson,3689753478.0,768 City Parkway,Y,Y
6,1007,Jeff,Winger,,1209 South Street,N,N
7,1008,Sherlock,Holmes,8766783469.0,98 Clue Drive,N,N
8,1009,Gandalf,,,123 Middle Earth,Y,
9,1010,Peter,Parker,1235455421.0,"25th Main Street, New York",Y,N


In [None]:
# Iterate through each index in the DataFrame 'df'
for x in df.index:
    # Check if the value in the 'Do_Not_Contact' column is 'Y'
    if df.loc[x, 'Do_Not_Contact'] == 'Y':
        # If 'Y' drop the row corresponding to the current index
        df.drop(x, inplace=True)

# Display the updated DataFrame
print(df)


    CustomerID First_Name   Last_Name  Phone_Number  \
0         1001      Frodo     Baggins  1234567890.0   
2         1003     Walter       White                 
4         1005        Jon        Snow  8766783469.0   
6         1007       Jeff      Winger                 
7         1008   Sherlock      Holmes  8766783469.0   
8         1009    Gandalf                             
9         1010      Peter      Parker  1235455421.0   
10        1011    Samwise      Gamgee                 
11        1012      Harry      Potter                 
12        1013        Don      Draper  1235432345.0   
13        1014     Leslie       Knope  8766783469.0   
14        1015       Toby  Flenderson  3047622467.0   
15        1016        Ron     Weasley  1235455421.0   
16        1017   Michael        Scott  1236439775.0   
17        1018      Clark        Kent                 
19        1020     Anakin   Skywalker  8766783469.0   

                           Address Paying Customer Do_Not_Contac

In [None]:
# Iterate through each index in the DataFrame 'df'
for x in df.index:
    # Check if the value in the 'Phone_Number' column is an empty string ('')
    if df.loc[x, 'Phone_Number'] == '':
        # If empty drop the row corresponding to the current index
        df.drop(x, inplace=True)

# Display the updated DataFrame
print(df)


    CustomerID First_Name   Last_Name  Phone_Number  \
0         1001      Frodo     Baggins  1234567890.0   
4         1005        Jon        Snow  8766783469.0   
7         1008   Sherlock      Holmes  8766783469.0   
9         1010      Peter      Parker  1235455421.0   
12        1013        Don      Draper  1235432345.0   
13        1014     Leslie       Knope  8766783469.0   
14        1015       Toby  Flenderson  3047622467.0   
15        1016        Ron     Weasley  1235455421.0   
16        1017   Michael        Scott  1236439775.0   
19        1020     Anakin   Skywalker  8766783469.0   

                           Address Paying Customer Do_Not_Contact  
0            123 Shire Lane, Shire               Y              N  
4                 123 Dragons Road               Y              N  
7                    98 Clue Drive               N              N  
9       25th Main Street, New York               Y              N  
12                2039 Main Street               Y    

In [None]:
# Reset the index of the DataFrame 'df' without adding a new index column
df.reset_index(drop=True)

# Display the updated DataFrame
df

Unnamed: 0,CustomerID,First_Name,Last_Name,Phone_Number,Address,Paying Customer,Do_Not_Contact
0,1001,Frodo,Baggins,1234567890.0,"123 Shire Lane, Shire",Y,N
4,1005,Jon,Snow,8766783469.0,123 Dragons Road,Y,N
7,1008,Sherlock,Holmes,8766783469.0,98 Clue Drive,N,N
9,1010,Peter,Parker,1235455421.0,"25th Main Street, New York",Y,N
12,1013,Don,Draper,1235432345.0,2039 Main Street,Y,N
13,1014,Leslie,Knope,8766783469.0,343 City Parkway,Y,N
14,1015,Toby,Flenderson,3047622467.0,214 HR Avenue,N,N
15,1016,Ron,Weasley,1235455421.0,2395 Hogwarts Avenue,N,N
16,1017,Michael,Scott,1236439775.0,"121 Paper Avenue, Pennsylvania",Y,N
19,1020,Anakin,Skywalker,8766783469.0,"910 Tatooine Road, Tatooine",Y,N
