In [1]:
import pandas as pd
import os
import openpyxl

In [None]:
# Read Excel file into DataFrame
df = pd.read_excel('03-12.xlsx', header=None)
df.head(15)

In [None]:
# Pivot the DataFrame
df_pivoted = df.pivot(columns=df.columns[0], values=df.columns[1])
df_pivoted.head()

In [None]:
# Rename the columns
df_pivoted.columns = ['Address', 'City', 'Country', 'Name', 'Postal_code', 'Province', 'Telephone']

df_pivoted.head(17)

In [None]:
df_pivoted.tail(10)

In [None]:
# Drop rows with all NaN values
df_pivoted.dropna(axis=0, how='all', inplace=True)
df_pivoted.tail(10)

In [None]:
# To see the number of rows and columns of the df
df_pivoted.shape

In [None]:
# back Filled the df replacing NaN Valus up to 6 rows
df_filled= df_pivoted.fillna(method="bfill", limit=6) # row index 0, 7, 13, 20, 27 
df_filled.tail(10)

In [None]:
# Containing only the rows with the specified index positions.
"""selected_rows = df_filled.iloc[[0, 7, 13, 20, 27]]
selected_rows"""

In [None]:
# Slicing with a step of 7.
df_selected_rows= df_filled.iloc[::7]
df_selected_rows.tail()

In [None]:
# Remove '\n' characters and extra spaces from the 'Name' and 'Address' columns
df_selected_rows.loc[:,'Name'] = df_selected_rows['Name'].str.strip()
df_selected_rows.loc[:,'Address'] = df_selected_rows['Address'].str.strip()
df_selected_rows.tail()

In [None]:
df_selected_rows.dtypes

In [None]:
# Define the desired column order
desired_columns = ['Name', 'Address', 'City', 'Province', 'Postal_code', 'Country', 'Telephone']

# Reorder the columns in the DataFrame
df_selected_rows = df_selected_rows.reindex(columns=desired_columns)

# Display the DataFrame with the reordered columns
df_selected_rows.head()

In [None]:
# Reset the row index
df_selected_rows.reset_index(drop=True, inplace=True)

# Display the DataFrame with the reordered columns
df_selected_rows.head()

In [None]:
df_selected_rows.shape

In [None]:
# Importing the second df 
url = 'https://countrycode.org/'
tables = pd.read_html(url)
print(len(tables))

In [None]:
df_code = tables[0]
df_code.head()

In [41]:
# Convert the case of the merge keys
df_code["COUNTRY"] = df_code["COUNTRY"].str.lower()

In [None]:
# Merged the two data frame
df_merged = pd.merge(df_selected_rows, df_code , left_on="Country", right_on="COUNTRY", how="left")
df_merged.tail()

In [None]:
# Drop the 'COUNTRY' column and the last four columns from the merged DataFrame
df_merged.drop(columns=['COUNTRY'] + list(df_merged.columns[-4:]), axis=1, inplace=True)
df_merged.head()


In [None]:
# Define the desired column order
desired_columns_final = ['Name', 'Address', 'City', 'Province', 'Postal_code', 'Country', 'COUNTRY CODE', 'Telephone']

# Reorder the columns in the DataFrame
df_final = df_merged.reindex(columns=desired_columns_final)

# Display the DataFrame with the reordered columns
df_final.head()

In [None]:
# Merge two columns into one column
# Concatenate 'COUNTRY CODE' and 'Telephone' columns into 'Telephone_code' column
df_merged['Telephone_code'] = df_merged['COUNTRY CODE'].astype(str) + '-' + df_merged['Telephone'].astype(str)

# Drop the 'COUNTRY CODE' and 'Telephone' columns
df_merged.drop(columns=['COUNTRY CODE', 'Telephone'], inplace=True)

df_merged.head()

In [None]:
# save to CSV without the index
df_merged.to_csv('output_data.csv', index = False)