In [15]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import stats

In [16]:
# Step 1: Load data from Excel file
file_path = r'Resources\Wine_data_all.xlsx' 
wine_country_data_df = pd.read_excel(file_path, sheet_name='Export')

# Step 2: Display the first few rows of the DataFrame to verify loading
print(wine_country_data_df.head())

  Continent Region/Country Product     Variable    Year     Unit  Quantity
0      Asia    Afghanistan    Wine  Consumption  1995.0  1000 hl       0.0
1      Asia    Afghanistan    Wine      Imports  1995.0  1000 hl       0.0
2      Asia    Afghanistan    Wine   Production  1995.0  1000 hl       0.0
3      Asia    Afghanistan    Wine  Consumption  1996.0  1000 hl       0.0
4      Asia    Afghanistan    Wine      Imports  1996.0  1000 hl       0.0


In [17]:
# Conversion factor from 1000 hl to gallons
conversion_factor = 1000 * 26.4172

# Convert the Quantity column to gallons
wine_country_data_df['Quantity (gallons)'] = wine_country_data_df['Quantity'] * conversion_factor

# Display the first few rows to verify the conversion
print(wine_country_data_df.head())

  Continent Region/Country Product     Variable    Year     Unit  Quantity  \
0      Asia    Afghanistan    Wine  Consumption  1995.0  1000 hl       0.0   
1      Asia    Afghanistan    Wine      Imports  1995.0  1000 hl       0.0   
2      Asia    Afghanistan    Wine   Production  1995.0  1000 hl       0.0   
3      Asia    Afghanistan    Wine  Consumption  1996.0  1000 hl       0.0   
4      Asia    Afghanistan    Wine      Imports  1996.0  1000 hl       0.0   

   Quantity (gallons)  
0                 0.0  
1                 0.0  
2                 0.0  
3                 0.0  
4                 0.0  


In [18]:
# Replace NaN or inf values with 0
wine_country_data_df['Quantity (gallons)'] = wine_country_data_df['Quantity (gallons)'].replace([np.inf, -np.inf], np.nan).fillna(0)

# Convert Quantity (gallons) to whole numbers by rounding
wine_country_data_df['Quantity (gallons)'] = wine_country_data_df['Quantity (gallons)'].round().astype(int)

# Display the first few rows to verify the adjustment
print(wine_country_data_df.head())


  Continent Region/Country Product     Variable    Year     Unit  Quantity  \
0      Asia    Afghanistan    Wine  Consumption  1995.0  1000 hl       0.0   
1      Asia    Afghanistan    Wine      Imports  1995.0  1000 hl       0.0   
2      Asia    Afghanistan    Wine   Production  1995.0  1000 hl       0.0   
3      Asia    Afghanistan    Wine  Consumption  1996.0  1000 hl       0.0   
4      Asia    Afghanistan    Wine      Imports  1996.0  1000 hl       0.0   

   Quantity (gallons)  
0                   0  
1                   0  
2                   0  
3                   0  
4                   0  


In [19]:
# Step 1: Load data from the CSV file
csv_file_path = r'Resources\ghcnd-countries.csv'  
ghcnd_countries_df = pd.read_csv(csv_file_path)

# Step 2: Display the first few rows of the DataFrame to verify loading
print(ghcnd_countries_df.head())

  Code               Country
0   AC   Antigua and Barbuda
1   AE  United Arab Emirates
2   AF           Afghanistan
3   AG               Algeria
4   AJ            Azerbaijan


In [20]:
# Merge the DataFrames
wine_country_data_code_mapping_df = pd.merge(wine_country_data_df, ghcnd_countries_df, 
                                             left_on='Region/Country', right_on='Country', 
                                             how='right')

# Replace NaN values with blanks (empty strings)
wine_country_data_code_mapping_df = wine_country_data_code_mapping_df.fillna('')

# Display the first few rows of the new DataFrame to verify
print(wine_country_data_code_mapping_df.head())

  Continent       Region/Country Product     Variable    Year     Unit  \
0   America  Antigua and Barbuda    Wine  Consumption  1995.0  1000 hl   
1   America  Antigua and Barbuda    Wine      Exports  1995.0  1000 hl   
2   America  Antigua and Barbuda    Wine      Imports  1995.0  1000 hl   
3   America  Antigua and Barbuda    Wine   Production  1995.0  1000 hl   
4   America  Antigua and Barbuda    Wine  Consumption  1996.0  1000 hl   

  Quantity Quantity (gallons) Code              Country  
0      1.0            26417.0   AC  Antigua and Barbuda  
1      0.0                0.0   AC  Antigua and Barbuda  
2      2.0            52834.0   AC  Antigua and Barbuda  
3      0.0                0.0   AC  Antigua and Barbuda  
4      2.0            52834.0   AC  Antigua and Barbuda  


In [21]:
# Export the DataFrame to a CSV file in the Resources folder
output_file_path = r'Resources/wine_country_data_code_mapping.csv'
wine_country_data_code_mapping_df.to_csv(output_file_path, index=False)
