In [2]:
import pandas as pd
from pathlib import Path
import requests
from datetime import datetime
from config import api_key
from meteostat import Point, Monthly



# File to Load (Remember to Change These)
wine_data = Path("Resources/winedataset-proj3.csv")

In [3]:
wine_df = pd.read_csv(wine_data)
wine_df.head(10)

Unnamed: 0.1,Unnamed: 0,country,points,price,province,title,variety,winery
0,0,Italy,87,,Sicily & Sardinia,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,1,Portugal,87,15.0,Douro,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,2,US,87,14.0,Oregon,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,3,US,87,13.0,Michigan,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,4,US,87,65.0,Oregon,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
5,5,Spain,87,15.0,Northern Spain,Tandem 2011 Ars In Vitro Tempranillo-Merlot (N...,Tempranillo-Merlot,Tandem
6,6,Italy,87,16.0,Sicily & Sardinia,Terre di Giurfo 2013 Belsito Frappato (Vittoria),Frappato,Terre di Giurfo
7,7,France,87,24.0,Alsace,Trimbach 2012 Gewurztraminer (Alsace),Gewürztraminer,Trimbach
8,8,Germany,87,12.0,Rheinhessen,Heinz Eifel 2013 Shine Gewürztraminer (Rheinhe...,Gewürztraminer,Heinz Eifel
9,9,France,87,27.0,Alsace,Jean-Baptiste Adam 2012 Les Natures Pinot Gris...,Pinot Gris,Jean-Baptiste Adam


In [14]:
#Creating a list of unique provinces
province_list = wine_df["province"].unique().tolist()

In [4]:
#Creating a list of unique provinces
province_list = wine_df["province"].unique().tolist()

#Iterating through our list and appending values
city_list = []

for city in province_list:
    print(city)
    city_list.append(city)

Sicily & Sardinia
Douro
Oregon
Michigan
Northern Spain
Alsace
Rheinhessen
California
Mosel
Other
Mendoza Province
Virginia
Beaujolais
Colchagua Valley
Southern Italy
Maule Valley
Bordeaux
Maipo Valley
Washington
Central Italy
Champagne
Burgundy
South Australia
Tejo
Rapel Valley
Galicia
France Other
Tuscany
Burgenland
New York
Leyda Valley
Piedmont
Stellenbosch
Simonsberg-Stellenbosch
Walker Bay
Alentejano
Central Spain
Southwest France
Aconcagua Valley
Loncomilla Valley
Marlborough
Northeastern Italy
Casablanca Valley
Veneto
Western Cape
Judean Hills
Alentejo
Coastal Region
Rhône Valley
Galilee
Beira Atlantico
Tokaj
Leithaberg
Santorini
Kremstal
Catalonia
Recas
Hawke's Bay
Curicó Valley
Limarí Valley
Colchagua Costa
Languedoc-Roussillon
Provence
Victoria
Rheingau
Tokaji
Naoussa
Valle de Guadalupe
Central Valley
Lontué Valley
Italy Other
Weinviertel
Thermenregion
Niederösterreich
Wagram
Loire Valley
Lombardy
Ontario
Österreichischer Sekt
Kamptal
Steiermark
Südsteiermark
Crete
Vinho Verd

In [16]:
# Set the API base URL
# https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={API key}
url = "https://api.openweathermap.org/data/2.5/weather?" # YOUR CODE HERE
units = 'metric'

# Define an empty list to fetch the weather data for each city
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(city_list):
        
    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city
    city_url = f"{url}appid={api_key}&units={units}&q={city}"   # YOUR CODE HERE
    
    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        # Parse the JSON and retrieve data
        city_weather = requests.get(city_url).json() 

        # Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        city_lat = city_weather['coord']['lat']
        city_lng = city_weather['coord']['lon']             
        

        # Append the City information into city_data list
        city_data.append({"City": city, 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          })

    # If an error is experienced, skip the city
    except:
        print("City not found. Skipping...")
        pass
              
# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | Sicily & Sardinia
City not found. Skipping...
Processing Record 2 of Set 1 | Douro
Processing Record 3 of Set 1 | Oregon
Processing Record 4 of Set 1 | Michigan
Processing Record 5 of Set 1 | Northern Spain
City not found. Skipping...
Processing Record 6 of Set 1 | Alsace
Processing Record 7 of Set 1 | Rheinhessen
City not found. Skipping...
Processing Record 8 of Set 1 | California
Processing Record 9 of Set 1 | Mosel
Processing Record 10 of Set 1 | Other
City not found. Skipping...
Processing Record 11 of Set 1 | Mendoza Province
Processing Record 12 of Set 1 | Virginia
Processing Record 13 of Set 1 | Beaujolais
Processing Record 14 of Set 1 | Colchagua Valley
City not found. Skipping...
Processing Record 15 of Set 1 | Southern Italy
City not found. Skipping...
Processing Record 16 of Set 1 | Maule Valley
City not found. Skipping...
Processing Record 17 of Set 1 | Bordeaux
Processing Record 18 

In [17]:
# Convert the cities weather data into a Pandas DataFrame
city_data_df = pd.DataFrame(city_data) # YOUR CODE HERE

# Show Record Count
city_data_df.count()

City    220
Lat     221
Lng     221
dtype: int64

In [18]:
# Display sample data
city_data_df.head()

Unnamed: 0,City,Lat,Lng
0,Douro,44.3834,-78.1995
1,Oregon,44.0001,-120.5014
2,Michigan,44.2503,-85.5003
3,Alsace,48.5,7.5
4,California,38.3004,-76.5074


In [19]:
#Function to retrieve weather data from meteostat given lat/lng coordinates
def get_weather_data(lat, lng):
    location = Point(lat, lng) # Creating a Point object for the given coordinates
    try:
        data = Monthly(location, start, end)
        data = data.fetch() # Fetching monthly weather data
        return data[['tavg', 'prcp']] # Returning average temp and total precipitation
    except:
        return None

# Define the time period for the data
start = datetime(2023, 1, 1)
end = datetime(2023, 12, 31)

# Initialize lists to hold the weather data
avg_temps = []
total_prcps = []

# Iterate over the rows in the dataframe
for index, row in city_data_df.iterrows():
    weather_data = get_weather_data(row['Lat'], row['Lng'])
    if weather_data is not None and not weather_data.empty:
        avg_temps.append(weather_data['tavg'].mean())
        total_prcps.append(weather_data['prcp'].sum())
    else:
        avg_temps.append(None)
        total_prcps.append(None)

# Add the weather data to the dataframe
city_data_df['Avg_Temp_2023'] = avg_temps
city_data_df['Total_Prcp_2023'] = total_prcps

print(city_data_df)

           City      Lat       Lng  Avg_Temp_2023  Total_Prcp_2023
0         Douro  44.3834  -78.1995       8.150000            889.5
1        Oregon  44.0001 -120.5014            NaN              NaN
2      Michigan  44.2503  -85.5003       8.718182            751.0
3        Alsace  48.5000    7.5000      12.983333            535.4
4    California  38.3004  -76.5074      15.381818            780.2
..          ...      ...       ...            ...              ...
216         Krk  45.0258   14.5731      16.645455           2190.0
217     Arcadia  34.1397 -118.0353      17.845455            878.7
218    Kathikas  34.9125   32.4258      21.127273            421.5
219      Lesbos  39.1667   26.3333      19.581818            648.4
220   Neuchâtel  46.9167    6.8333      12.375000            985.2

[221 rows x 5 columns]


In [20]:
city_data_df.head(10)

Unnamed: 0,City,Lat,Lng,Avg_Temp_2023,Total_Prcp_2023
0,Douro,44.3834,-78.1995,8.15,889.5
1,Oregon,44.0001,-120.5014,,
2,Michigan,44.2503,-85.5003,8.718182,751.0
3,Alsace,48.5,7.5,12.983333,535.4
4,California,38.3004,-76.5074,15.381818,780.2
5,Mosel,50.7833,12.4667,10.633333,753.8
6,Mendoza Province,-34.5,-68.5,16.658333,337.5
7,Virginia,37.5481,-77.4467,16.175,1105.1
8,Beaujolais,29.7858,-90.7681,22.072727,1172.4
9,Bordeaux,44.8404,-0.5805,15.133333,1111.4


In [21]:
# Exporting extracted data to CSV for further processing
city_data_df.to_csv('Resources/CityData_raw.csv', index=False)
wine_df.to_csv('Resources/WineData_raw.csv', index=False)