In [1]:
import pandas as pd

In [2]:
# Read historic weather data in CSVs to a DataFrame

# Declare empty DataFrame
historical_weather_df = pd.DataFrame()

file_paths = ("historical_weather_france.csv", "historical_weather_italy.csv", "historical_weather_spain.csv", "historical_weather_us.csv")

for path in file_paths:
    temp_df = pd.read_csv(path)
    historical_weather_df = pd.concat([historical_weather_df, temp_df])

historical_weather_df

Unnamed: 0,Province,Year,Historical Precip,Historical Temp
0,Alsace,1901,864.18,8.41
1,Alsace,1902,737.39,8.57
2,Alsace,1903,757.38,8.90
3,Alsace,1904,704.56,9.39
4,Alsace,1905,840.87,8.72
...,...,...,...,...
475,Washington,2016,1103.25,9.64
476,Washington,2017,1174.94,8.55
477,Washington,2018,910.29,9.17
478,Washington,2019,810.17,8.31


In [3]:
# Save historical_weather_df to CSV
historical_weather_df.to_csv('historical_weather.csv', index=False)

In [4]:
# Read projected weather data in CSVs to a DataFrame

# Declare empty DataFrame
projected_weather_df = pd.DataFrame()

file_paths = ("projected_weather_france.csv", "projected_weather_italy.csv", "projected_weather_spain.csv", "projected_weather_us.csv")

for path in file_paths:
    temp_df = pd.read_csv(path)
    projected_weather_df = pd.concat([projected_weather_df, temp_df])

projected_weather_df

Unnamed: 0,Province,Year,Projected Precip,Projected Temp
0,Alsace,2006,1236.97,9.51
1,Alsace,2007,1250.60,9.61
2,Alsace,2008,1233.63,9.83
3,Alsace,2009,1274.26,10.06
4,Alsace,2010,1284.61,9.38
...,...,...,...,...
375,Washington,2096,1232.57,12.87
376,Washington,2097,1227.24,12.36
377,Washington,2098,1286.83,12.84
378,Washington,2099,1199.20,12.91


In [5]:
# Save projected_weather_df to CSV
projected_weather_df.to_csv('projected_weather.csv', index=False)

In [6]:
# Add "Timeseries" column to ID data in historical DF as Historical
historical_weather_df['Timeseries'] = pd.Series(['Historical' for x in range(len(historical_weather_df.index))])

# Rename Historical Precip and Historical Temp columns
historical_weather_df = historical_weather_df.rename(columns={'Historical Precip': 'Precipitation', 'Historical Temp': 'Temperature'})

historical_weather_df

Unnamed: 0,Province,Year,Precipitation,Temperature,Timeseries
0,Alsace,1901,864.18,8.41,Historical
1,Alsace,1902,737.39,8.57,Historical
2,Alsace,1903,757.38,8.90,Historical
3,Alsace,1904,704.56,9.39,Historical
4,Alsace,1905,840.87,8.72,Historical
...,...,...,...,...,...
475,Washington,2016,1103.25,9.64,Historical
476,Washington,2017,1174.94,8.55,Historical
477,Washington,2018,910.29,9.17,Historical
478,Washington,2019,810.17,8.31,Historical


In [7]:
# Add "Timeseries" column to ID data in projected DF as Projected
projected_weather_df['Timeseries'] = pd.Series(['Projected' for x in range(len(projected_weather_df.index))])

# Rename Projected Precip and Projected Temp columns
projected_weather_df = projected_weather_df.rename(columns={'Projected Precip': 'Precipitation', 'Projected Temp': 'Temperature'})

projected_weather_df

Unnamed: 0,Province,Year,Precipitation,Temperature,Timeseries
0,Alsace,2006,1236.97,9.51,Projected
1,Alsace,2007,1250.60,9.61,Projected
2,Alsace,2008,1233.63,9.83,Projected
3,Alsace,2009,1274.26,10.06,Projected
4,Alsace,2010,1284.61,9.38,Projected
...,...,...,...,...,...
375,Washington,2096,1232.57,12.87,Projected
376,Washington,2097,1227.24,12.36,Projected
377,Washington,2098,1286.83,12.84,Projected
378,Washington,2099,1199.20,12.91,Projected


In [8]:
combined_weather_df = pd.concat([historical_weather_df, projected_weather_df])
combined_weather_df

Unnamed: 0,Province,Year,Precipitation,Temperature,Timeseries
0,Alsace,1901,864.18,8.41,Historical
1,Alsace,1902,737.39,8.57,Historical
2,Alsace,1903,757.38,8.90,Historical
3,Alsace,1904,704.56,9.39,Historical
4,Alsace,1905,840.87,8.72,Historical
...,...,...,...,...,...
375,Washington,2096,1232.57,12.87,Projected
376,Washington,2097,1227.24,12.36,Projected
377,Washington,2098,1286.83,12.84,Projected
378,Washington,2099,1199.20,12.91,Projected


In [9]:
# Additional edits after wine reviews EDA
# Remove Sardinia/Sardegna data
combined_weather_df = combined_weather_df[combined_weather_df.Province != 'Sardegna']

# Rename Toscana and Bourgogne so they match the corresponding province names in wine reviews
combined_weather_df.loc[combined_weather_df['Province']=='Toscana', 'Province']="Tuscany"
combined_weather_df.loc[combined_weather_df['Province']=='Bourgogne', 'Province']="Burgundy"

# Rename Province and Year for SQL join
combined_weather_df = combined_weather_df.rename(columns={'Province': 'Prov_Weather', 'Year': 'Year_Weather'})

combined_weather_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Unnamed: 0,Prov_Weather,Year_Weather,Precipitation,Temperature,Timeseries
0,Alsace,1901,864.18,8.41,Historical
1,Alsace,1902,737.39,8.57,Historical
2,Alsace,1903,757.38,8.90,Historical
3,Alsace,1904,704.56,9.39,Historical
4,Alsace,1905,840.87,8.72,Historical
...,...,...,...,...,...
375,Washington,2096,1232.57,12.87,Projected
376,Washington,2097,1227.24,12.36,Projected
377,Washington,2098,1286.83,12.84,Projected
378,Washington,2099,1199.20,12.91,Projected


In [10]:
# Save projected_weather_df to CSV
combined_weather_df.to_csv('combined_weather.csv', index=False)