In [1]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import stats

In [2]:
# Step 1: Load data from Excel file
file_path = r'Resources\Wine_Country_Weather_Data_adj.xlsx' 
wine_country_weather_data_df = pd.read_excel(file_path, sheet_name='Table')

# Step 2: Display the first few rows of the DataFrame to verify loading
print(wine_country_weather_data_df.head())

  Continent Region/Country Product     Variable  Year     Unit  Quantity  \
0      Asia    Afghanistan    Wine  Consumption  1995  1000 hl         0   
1      Asia    Afghanistan    Wine      Imports  1995  1000 hl         0   
2      Asia    Afghanistan    Wine   Production  1995  1000 hl         0   
3      Asia    Afghanistan    Wine  Consumption  1996  1000 hl         0   
4      Asia    Afghanistan    Wine      Imports  1996  1000 hl         0   

  Country_Code  DP10  DP1X  ...  DX90  EMNT  EMXP  EMXT  HTDD  MNPN  MXPN  \
0           AF     0     0  ...     0   0.0   0.0   0.0     0     0     0   
1           AF     0     0  ...     0   0.0   0.0   0.0     0     0     0   
2           AF     0     0  ...     0   0.0   0.0   0.0     0     0     0   
3           AF     0     0  ...     0   0.0   0.0   0.0     0     0     0   
4           AF     0     0  ...     0   0.0   0.0   0.0     0     0     0   

   PRCP  TAVG  TMAX  
0   0.0   0.0   0.0  
1   0.0   0.0   0.0  
2   0.0   0.0 

In [3]:
# Load data from Excel file
file_path = r'Resources/Wine_Country_Weather_Data_adj.xlsx' 
glossary_df = pd.read_excel(file_path, sheet_name='Glossary')

# Adjust display options to show entire strings
pd.set_option('display.max_colwidth', 60)

# Display the first few rows of the glossary dataframe
print(glossary_df)

   DataType                                                   Definition
0      DP10                              Number of days with >= 0.1 inch
1      DP1X                             Number of days with >= 1.00 inch
2      DT32  Number of days with maximum temperature <= 32 degrees Fa...
3      DX70  Number of days with maximum temperature >= 70 degrees Fa...
4      DX90  Number of days with maximum temperature >= 90 degrees Fa...
5      EMNT  Extreme minimum temperature for month. Lowest daily mini...
6      EMXP  Highest daily total of precipitation in the month. Given...
7      EMXT  Extreme maximum temperature for month. Highest daily max...
8      HTDD  Heating Degree Days. Computed when daily average tempera...
9      MNPN  Monthly Mean Minimum Temperature of evaporation pan wate...
10     MXPN  Monthly Mean Maximum Temperature of evaporation pan wate...
11     PRCP   Total Monthly Precipitation. Given in inches or millime...
12     TAVG  Average Monthly Temperature. Computed 

In [9]:
# Convert quantity from 1000hl to gallons
# Assuming the quantity column is named 'quantity' and it is in '1000hl'
# 1 hectoliter (hl) is approximately 26.4172 gallons
# Therefore, 1000 hl is 1000 * 26.4172 gallons

conversion_factor = 26.4172
wine_country_weather_data_df['Quantity_gallons'] = wine_country_weather_data_df['Quantity'].astype(float) * 1000 * conversion_factor

# Reorder columns to place 'Quantity_Gallons' right after 'Quantity'
columns = list(wine_country_weather_data_df.columns)
quantity_index = columns.index('Quantity')
columns.insert(quantity_index + 1, columns.pop(columns.index('Quantity_gallons')))
wine_country_weather_data_final_df = wine_country_weather_data_df[columns]

# Display the modified dataframe
print(wine_country_weather_data_final_df.head())

  Continent Region/Country Product     Variable  Year     Unit  Quantity  \
0      Asia    Afghanistan    Wine  Consumption  1995  1000 hl         0   
1      Asia    Afghanistan    Wine      Imports  1995  1000 hl         0   
2      Asia    Afghanistan    Wine   Production  1995  1000 hl         0   
3      Asia    Afghanistan    Wine  Consumption  1996  1000 hl         0   
4      Asia    Afghanistan    Wine      Imports  1996  1000 hl         0   

   Quantity_gallons Country_Code  DP10  ...  DX90  EMNT  EMXP  EMXT  HTDD  \
0               0.0           AF     0  ...     0   0.0   0.0   0.0     0   
1               0.0           AF     0  ...     0   0.0   0.0   0.0     0   
2               0.0           AF     0  ...     0   0.0   0.0   0.0     0   
3               0.0           AF     0  ...     0   0.0   0.0   0.0     0   
4               0.0           AF     0  ...     0   0.0   0.0   0.0     0   

   MNPN  MXPN  PRCP  TAVG  TMAX  
0     0     0   0.0   0.0   0.0  
1     0     