## Isolating weather Stations in wine areas

In [3]:
import pandas as pd

## Import Data Frame

In [4]:
# Load the CSV file
file_path = 'Resources/weather_all_country_codes3.csv'
df = pd.read_csv(file_path)


## Pivot the data Frame

In [5]:
# Step 1: Pivot the DataFrame
pivoted_df = df.pivot_table(index=['station', 'date'], columns='datatype', values='value').reset_index()


## Update the country values

In [6]:
# Step 2: Update the 'country' values
pivoted_df['country'] = pivoted_df['station'].str[:2]



## Load the station code

In [7]:
# Define the column names for station codes
column_names = ['station_id', 'latitude', 'longitude', 'elevation', 'location']

# Define the column widths for fixed-width format
colspecs = [(0, 11), (12, 20), (21, 30), (31, 37), (38, None)]

# Read the text file into a DataFrame using fixed-width format
file_path_txt = 'Resources/stationcodes.txt'
df_station_codes = pd.read_fwf(file_path_txt, colspecs=colspecs, header=None, names=column_names)



## Rename Columns for Merging

In [8]:
# Rename the 'station_id' column to 'station' for merging
df_station_codes.rename(columns={'station_id': 'station'}, inplace=True)


## Clean Column Names

In [9]:
# Remove any leading or trailing whitespaces from column names
pivoted_df.columns = pivoted_df.columns.str.strip()
df_station_codes.columns = df_station_codes.columns.str.strip()


## Merge the DataFrames

In [10]:
# Merge the cleaned weather DataFrame with the station codes DataFrame
merged_df = pd.merge(pivoted_df, df_station_codes, on='station', how='left')


## Display the Merged DataFrame

In [12]:
# Display the merged DataFrame
merged_df


Unnamed: 0,station,date,DP10,DP1X,DT32,DX70,DX90,EMNT,EMXP,EMXT,...,MNPN,MXPN,PRCP,TAVG,TMAX,country,latitude,longitude,elevation,location
0,GHCND:AE000041196,2012-01-01T00:00:00,,,,346.0,226.0,,,121.0,...,,,,,96.0,GH,,,,
1,GHCND:AE000041196,2023-01-01T00:00:00,8.0,1.0,,,,,1.06,,...,,,3.42,,,GH,,,,
2,GHCND:AEM00041194,2004-01-01T00:00:00,4.0,0.0,,,,,0.90,,...,,,2.31,,,GH,,,,
3,GHCND:AEM00041194,2006-01-01T00:00:00,7.0,1.0,,,,,1.77,,...,,,4.08,,,GH,,,,
4,GHCND:AEM00041194,2010-01-01T00:00:00,5.0,1.0,,,,,3.15,,...,,,4.83,,,GH,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31219,GHCND:WA010101860,2003-01-01T00:00:00,35.0,0.0,0.0,348.0,117.0,38.0,0.87,100.0,...,,,12.60,72.3,85.5,GH,,,,
31220,GHCND:WA010101860,2022-01-01T00:00:00,,,,348.0,126.0,,,99.0,...,,,,,86.5,GH,,,,
31221,GHCND:WA012084750,1998-01-01T00:00:00,,,,340.0,210.0,,,103.0,...,,,,,90.7,GH,,,,
31222,GHCND:WA012084750,1999-01-01T00:00:00,,,,343.0,132.0,,,102.0,...,,,,,88.3,GH,,,,


In [14]:
# Load the Weather Data
file_path = 'Resources/weather_all_country_codes3.csv'
df = pd.read_csv(file_path)

# Pivot the Weather DataFrame
pivoted_df = df.pivot_table(index=['station', 'date'], columns='datatype', values='value').reset_index()

# Update the 'country' values
pivoted_df['country'] = pivoted_df['station'].str[:2]

# Load the Station Codes Data
column_names = ['station_id', 'latitude', 'longitude', 'elevation', 'location']
colspecs = [(0, 11), (12, 20), (21, 30), (31, 37), (38, None)]
file_path_txt = 'Resources/stationcodes.txt'
df_station_codes = pd.read_fwf(file_path_txt, colspecs=colspecs, header=None, names=column_names)

# Rename Columns for Merging
df_station_codes.rename(columns={'station_id': 'station'}, inplace=True)

# Clean Column Names
pivoted_df.columns = pivoted_df.columns.str.strip()
df_station_codes.columns = df_station_codes.columns.str.strip()

# Merge the DataFrames
merged_df = pd.merge(pivoted_df, df_station_codes, on='station', how='left')

# Filter out the stations without location data
stations_without_location = merged_df[merged_df[['latitude', 'longitude', 'location']].isnull().any(axis=1)]

# Display the stations without location data
print(stations_without_location.head())


             station                 date  DP10  DP1X  DT32   DX70   DX90  \
0  GHCND:AE000041196  2012-01-01T00:00:00   NaN   NaN   NaN  346.0  226.0   
1  GHCND:AE000041196  2023-01-01T00:00:00   8.0   1.0   NaN    NaN    NaN   
2  GHCND:AEM00041194  2004-01-01T00:00:00   4.0   0.0   NaN    NaN    NaN   
3  GHCND:AEM00041194  2006-01-01T00:00:00   7.0   1.0   NaN    NaN    NaN   
4  GHCND:AEM00041194  2010-01-01T00:00:00   5.0   1.0   NaN    NaN    NaN   

   EMNT  EMXP   EMXT  ...  MNPN  MXPN  PRCP  TAVG  TMAX  country latitude  \
0   NaN   NaN  121.0  ...   NaN   NaN   NaN   NaN  96.0       GH      NaN   
1   NaN  1.06    NaN  ...   NaN   NaN  3.42   NaN   NaN       GH      NaN   
2   NaN  0.90    NaN  ...   NaN   NaN  2.31   NaN   NaN       GH      NaN   
3   NaN  1.77    NaN  ...   NaN   NaN  4.08   NaN   NaN       GH      NaN   
4   NaN  3.15    NaN  ...   NaN   NaN  4.83   NaN   NaN       GH      NaN   

   longitude  elevation  location  
0        NaN        NaN       NaN  
1 