## **Import Libraries and Data**

In [1]:
import numpy as np
import pandas as pd

## **Task: Preprocess the data for further analysis**

In [2]:
# Read data

data= pd.read_excel("Jumbled-up-Customers-Details.xlsx")
data

Unnamed: 0,Name Hussein Hakeem Address Number 22 Fioye Crescent Surulere Lagos Age 17 Gender Male
0,Name Arojoye Samuel Address 11 Omolade Close O...
1,"Name Alex Ezurum Address 1 Adamu Lane, Abuja A..."
2,Name Susan Nwaimo Address Number 58 Yaba Stree...
3,Name Ajao Opeyemi Address No12 Olubunmi Street...
4,Name Banjoko Adebusola Address 34 Ngige Street...
5,"Name Muhammed Olabisi Address 13, ICAN road, E..."
6,"Name Oluwagbemi Mojisola Address ACCA Lane, On..."


### **Solution:**
- We have to create columns as following:
    - Name
    - Address
    - Age
    - Gender
1. Send column header down as it as data, not metadata.
2. Use re.search function to search specific pattern and group them into columns
3. Create a function that does all this.

In [3]:
# Push the column header as row and allocate it a random index using "loc"
data.loc[10] = data.columns[0]

# Rename the column header
data.rename(columns= {data.columns[0]: "Personal Details"}, inplace= True)

# Reset the index
data.reset_index(drop=True, inplace=True)

data

Unnamed: 0,Personal Details
0,Name Arojoye Samuel Address 11 Omolade Close O...
1,"Name Alex Ezurum Address 1 Adamu Lane, Abuja A..."
2,Name Susan Nwaimo Address Number 58 Yaba Stree...
3,Name Ajao Opeyemi Address No12 Olubunmi Street...
4,Name Banjoko Adebusola Address 34 Ngige Street...
5,"Name Muhammed Olabisi Address 13, ICAN road, E..."
6,"Name Oluwagbemi Mojisola Address ACCA Lane, On..."
7,Name Hussein Hakeem Address Number 22 Fioye Cr...


In [4]:
# View all data

print(data.to_string())

                                                                         Personal Details
0      Name Arojoye Samuel Address 11 Omolade Close Omole Estate Lagos Age 16 Gender Male
1                         Name Alex Ezurum Address 1 Adamu Lane, Abuja Age 14 Gender Male
2     Name Susan Nwaimo Address Number 58 Yaba Street, Kaduna State  Age 16 Gender Female
3           Name Ajao Opeyemi Address No12 Olubunmi Street, Abeokuta Age 18 Gender Female
4      Name Banjoko Adebusola Address 34 Ngige Street, Ugheli, Delta Age 14 Gender Female
5                 Name Muhammed Olabisi Address 13, ICAN road, Enugu Age 12 Gender Female
6                Name Oluwagbemi Mojisola Address ACCA Lane, Onitsha Age 13 Gender Female
7  Name Hussein Hakeem Address Number 22 Fioye Crescent Surulere Lagos Age 17 Gender Male


In [5]:
# Create a function using regex
import re

def extract_deails (row):
    match = re.search(r"Name (.+?) Address (.+?) Age (\d+) Gender (.+)", row)
    
    if match:
        name = match.group(1)
        address= match.group(2)
        age= match.group(3)
        gender= match.group(4)
        
        return pd.Series([name, address, age, gender])
    
    return pd.Series([None, None, None, None])

In [6]:
# Apply function
data[["name", "address", "age", "gender"]] = data['Personal Details'].apply(extract_deails)

# Drop Personal Details column
data.drop(columns= "Personal Details", inplace= True)

data

Unnamed: 0,name,address,age,gender
0,Arojoye Samuel,11 Omolade Close Omole Estate Lagos,16,Male
1,Alex Ezurum,"1 Adamu Lane, Abuja",14,Male
2,Susan Nwaimo,"Number 58 Yaba Street, Kaduna State",16,Female
3,Ajao Opeyemi,"No12 Olubunmi Street, Abeokuta",18,Female
4,Banjoko Adebusola,"34 Ngige Street, Ugheli, Delta",14,Female
5,Muhammed Olabisi,"13, ICAN road, Enugu",12,Female
6,Oluwagbemi Mojisola,"ACCA Lane, Onitsha",13,Female
7,Hussein Hakeem,Number 22 Fioye Crescent Surulere Lagos,17,Male


In [7]:
# Save the transformed data to original file

with pd.ExcelWriter("Jumbled-up-Customers-Details.xlsx", engine= "openpyxl", mode= 'a') as writer:
    data.to_excel(writer, sheet_name= "Transformed_data", index= False)