In [None]:
#!pip install geopy
#!pip install pandas

from geopy.geocoders import Nominatim
import pandas as pd
from datetime import datetime
geolocator = Nominatim(user_agent="my_user_agent")

loc = geolocator.geocode("Alsace, France")
print(loc)

In [None]:
#!pip install meteostat
# Import Meteostat library
from meteostat import Stations, Daily

# Get nearby weather stations
stations = Stations()
stations = stations.nearby(loc.latitude, loc.longitude)
station = stations.fetch(10)
station = station[station['distance'] <= 5e5]

In [None]:
station

In [None]:
from datetime import datetime, timedelta
import numpy as np

# Set time period
start = datetime(2000, 1, 1)
end = datetime(2022, 12, 31)

# Get Daily data
data = Daily(station.index.values[0], start, end)
data = data.fetch()
data = data.reset_index()
data = data[['time','tavg', 'tmin', 'tmax']]


In [None]:
#data['time']

start = datetime(2000, 1, 1)
end = datetime(2022, 12, 31)

dates = pd.date_range(start='2000-01-01', end='2022-12-31')


i = 0
data = pd.DataFrame(columns=['time','tavg', 'tmin', 'tmax'], dtype='datetime64[ns]')
while (data['time'].empty) | (data['time'].min() > start) :
    if i <= len(station.index) - 1:
        next_data = Daily(station.index.values[i], start, end)
        next_data = next_data.fetch()
        next_data = next_data.reset_index()
        next_data = next_data[['time','tavg', 'tmin', 'tmax']]
        data = pd.concat([next_data, data])
        end = data['time'][0] - pd.Timedelta(days=1)
        i = i + 1
    else:
        break
if not dates.isin(data['time']).all():
    print(f'Some dates not recorded: {round(100*len(dates[~dates.isin(data["time"])])/len(dates),2)}%')

In [None]:
next_data

In [None]:
data['year'] = data['time'].dt.year
data['month'] = data['time'].dt.month

In [None]:
data_month_year = data.groupby(['year', 'month'])[['tavg', 'tmin', 'tmax']].mean().reset_index()

In [None]:
data_month = data_month_year.groupby('month')[['tavg', 'tmin', 'tmax']].mean().reset_index()
data_month.columns = ['month', 'tavg_month', 'tmin_month', 'tmax_month']
data_month_year = data_month_year.merge(data_month, how = 'left', on = 'month')

In [None]:
data_month_year['tflux_month'] = data_month_year['tavg'] - data_month_year['tavg_month']

In [None]:
data_month_year = pd.read_csv('data/france_regions_weather_data.csv')

In [None]:
convert_months = {
    1:"Jan",
    2:"Feb",
    3:"Mar",
    4:"Apr",
    5:"May",
    6:"Jun", 
    7:"July",
    8:"Aug",
    9:"Sep",
    10:"Oct",
    11:"Nov",
    12:"Dec"
}
data_month_year['month'] = data_month_year['month'].replace(convert_months)

In [None]:
data_month_year[(data_month_year['location'] == 'Alsace, France')]

In [None]:
data_month_year

In [None]:
#!pip install plotly
#!pip install nbformat
import plotly.graph_objects as go
from plotly.subplots import make_subplots

plot_data = data_month_year[(data_month_year['year'] == 2022) & (data_month_year['location'] == 'Alsace, France')]

fig = make_subplots(specs=[[{"secondary_y": True}]])

#fig.add_trace(go.Bar(x = plot_data['month'], y = plot_data['tflux_month'], name = "2005"))
fig.update_layout(
    shapes = [
    {
            'type': 'rect',
            'xref': 'x',
            'yref': 'paper',
            'x0': 'Mar',
            'y0': 0,
            'x1': 'Oct',
            'y1': 1,
            'fillcolor': '#ffffe0',
            'opacity': 0.5,
            'line': {
                'width': 0,
            }
    
    }
    ]
)
fig.add_trace(go.Scatter(x = plot_data['month'], y = plot_data['tmax']))
fig.add_trace(go.Scatter(x = plot_data['month'], y = plot_data['tmin']))
fig.update_layout(
    plot_bgcolor='#FFFFFF',
        yaxis=dict(range=[-10, 45])
)
fig.add_vline(
    x = 'Mar',
    line_dash = 'dash',
)
fig.add_annotation(
    x="Mar",
    y = 45,
    text="Growth Cycle Start",
    showarrow = False
)
fig.add_vline(
    x = 'Oct',
    line_dash = 'dash',
)
fig.add_annotation(
    x = 'Oct',
    y = 45,
    text = 'Harvest',
    showarrow=False
)

In [None]:
class FetchDataFoo():

    def __init__(self, place, start_date, end_date):
        self.place = place
        self.start_date = start_date
        self.end_date = end_date



    def get_lat_long(self):
        geolocator = Nominatim(user_agent="my_user_agent")
        loc = geolocator.geocode(self.place)
        return loc

    def get_nearest_station_id(self, loc, search_distance = 50000):
        stations = Stations()
        stations = stations.nearby(loc.latitude, loc.longitude)
        station = stations.fetch()

        # only return stations within 50kms
        station = station[station["distance"] <= search_distance]
        try:
            if len(station) == 0:
                raise ValueError(f"`station` is empty, check the `search_distance` value is not too large`")
            return station
        except ValueError as error:
            print(str(error))
            pass
                                 
    def get_all_weather_data(self):
        
        # Get Daily data
        loc = self.get_lat_long()
        station_id = self.get_nearest_station_id(loc, search_distance=50000)
        
        data_list = []
        for j in range(len(station_id.index)):
        
            point_data = Daily(station_id.index.values[j], self.start_date, self.end_date)
            point_data = point_data.fetch()

            point_data = point_data.reset_index()
            point_data = point_data[['time','tavg', 'tmin', 'tmax']]
            point_data['location'] = self.place

            # drop any rows where time, tav, tmin, tmax are NA
            point_data = point_data[(point_data["tavg"].isna())|
                                    (point_data["tmin"].isna())|
                                    (point_data["tmax"].isna())|
                                    (point_data["time"].isna())
                                    ]
            
            data_list.append(point_data)


        return data_list
        
    def combine_weather_data(self):

        weather_data_list = self.get_all_weather_data

        output_data = pd.DataFrame(columns=weather_data_list[0].columns)

        # create a list of all dates that can be removed when date has already been found
        date_list = pd.Series(start=self.start_date, end=self.end_date)

        for k in range(len(weather_data_list)):
            #only select values that are in the date_list so haven't yet been populated
            weather_data_list[k] = weather_data_list[k][weather_data_list[k]['time'].isin(date_list)]

            output_data = pd.concat([output_data, weather_data_list[k]])

            # update date list to drop dates already included
            date_list = date_list[~date_list.isin(weather_data_list[k]["time"])]

        return output_data

In [None]:
fooClass = FetchDataFoo(place='Alsace, France', start_date='01-01-2000', end_date='2022-01-01')
loc = fooClass.get_lat_long()
foo_weather_data = fooClass.get_weather_data()


In [None]:
foo_weather_data[0][~foo_weather_data[0][["time","tavg", "tmin", "tmax", "location"]].isna()]

In [None]:
foo_output_data = pd.DataFrame(columns=['time', 'tavg', 'tmin', "tmax", "location"])
foo_dr = pd.date_range(start='2000-01-01', end ='2022-01-01')

foo_weather_data[0] = foo_weather_data[0][foo_weather_data[0]['time'].isin(foo_dr)]

foo_output_data = pd.concat([foo_output_data, foo_weather_data[0]])

foo_dr = foo_dr[~foo_dr.isin(foo_weather_data[0]["time"])]
foo_dr

In [None]:
import csv

# Open the CSV file and read it into a list
# Open the CSV file and read it into a list
with open('data/france-wine-regions.csv', newline='') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    data = [row[0] for row in reader]
    data = data[1:] # drop the header

data