<a href="https://colab.research.google.com/github/karoldem/travel-season/blob/main/travel_season.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy, scipy.optimize

def fit_sin(tt, yy):
    '''Fit sin to the input time sequence, and return fitting parameters "amp", "omega", "phase", "offset", "freq", "period" and "fitfunc"'''
    tt = numpy.array(tt)
    yy = numpy.array(yy)
    ff = numpy.fft.fftfreq(len(tt), (tt[1]-tt[0]))   # assume uniform spacing
    Fyy = abs(numpy.fft.fft(yy))
    guess_freq = abs(ff[numpy.argmax(Fyy[1:])+1])   # excluding the zero frequency "peak", which is related to offset
    guess_amp = numpy.std(yy) * 2.**0.5
    guess_offset = numpy.mean(yy)
    guess = numpy.array([guess_amp, 2.*numpy.pi*guess_freq, 0., guess_offset])

    def sinfunc(t, A, w, p, c):  return A * numpy.sin(w*t + p) + c
    popt, pcov = scipy.optimize.curve_fit(sinfunc, tt, yy, p0=guess)
    A, w, p, c = popt
    f = w/(2.*numpy.pi)
    fitfunc = lambda t: A * numpy.sin(w*t + p) + c
    return {"amp": A, "omega": w, "phase": p, "offset": c, "freq": f, "period": 1./f, "fitfunc": fitfunc, "maxcov": numpy.max(pcov), "rawres": (guess,popt,pcov)}

In [2]:
import numpy, scipy.optimize

def fit_sin(tt, yy):
    '''Fit sin to the input time sequence, and return fitting parameters "amp", "omega", "offset" and "fitfunc"'''
    tt = numpy.array(tt)
    yy = numpy.array(yy)
    ff = numpy.fft.fftfreq(len(tt), (tt[1]-tt[0]))   # assume uniform spacing
    guess_amp = numpy.std(yy) * 2.**0.5
    guess_offset = numpy.mean(yy)
    guess = numpy.array([guess_amp, 0., guess_offset])

    def sinfunc(t, A, p, c):  return A * numpy.sin(0.5235987755982988*t + p) + c
    popt, pcov = scipy.optimize.curve_fit(sinfunc, tt, yy, p0=guess)
    A, p, c = popt
    f = 0.5235987755982988/(2.*numpy.pi)
    fitfunc = lambda t: A * numpy.sin(0.5235987755982988*t + p) + c
    return {"amp": A,  "phase": p, "offset": c, "fitfunc": fitfunc, "maxcov": numpy.max(pcov), "rawres": (guess,popt,pcov)}

In [3]:
from re import findall

def inject(data, dataframe, variable):

    if not set(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec', 'City', 'Country']).issubset(dataframe):
        return "Not a valid dataframe"

    for i in dataframe.index:

        k = []
        for j in ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']:

            string = dataframe.loc[i][j]

            if isinstance(string, float):
                k.append(string)

            else:
                string = string.replace('−', '-') #No, they are not the same
                string = findall(r"[-+]?(?:\d*\.*\d*)", string )
                string = list(   filter( ('').__ne__, string )   )[0]
                k.append ( float(string) )

        temporaryNvmVaiable = fit_sin([0,1,2,3,4,5,6,7,8,9,10,11], k)
        toAddtemporaryVariable = {}
        toAddtemporaryVariable['amplitude'] = temporaryNvmVaiable['amp']
        toAddtemporaryVariable['average']   = temporaryNvmVaiable['offset']
        toAddtemporaryVariable['phase']     = temporaryNvmVaiable['phase']

        key = (dataframe.loc[i]['City'], dataframe.loc[i]['Country'])

        if key in data:
            data[key][variable] = toAddtemporaryVariable

        else:
            data[key] = {variable: toAddtemporaryVariable}

In [4]:
import requests
import pandas as pd

data = {}
k = []

urls = [{'variable' : 'temperature', 'url' : 'https://en.wikipedia.org/wiki/List_of_cities_by_average_temperature'},
        {'variable' : 'sunshine', 'url' : 'https://en.wikipedia.org/wiki/List_of_cities_by_sunshine_duration'}]

for url in urls:
    html = requests.get(url['url']).content
    df_list = pd.read_html(html)
    for i in df_list:
        inject(data, i, url['variable'])
        k.append(i)



In [15]:
data[('Warsaw', 'Poland')]

record = k[2].loc[lambda df: df['City'] == "Warsaw"]

blah = []
for j in ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']:

    string = str(record[j])

    if isinstance(string, float):
        blah.append(string)

    else:
        print(string)
        string = string.replace('−', '-') #No, they are not the same
        string = findall(r"[-+]?(?:\d*\.*\d*)", string )
        string = list(   filter( ('').__ne__, string )   )[1]
        blah.append ( float(string) )

blah

47    −1.8(28.8)
Name: Jan, dtype: object
47    −0.6(30.9)
Name: Feb, dtype: object
47    2.8(37.0)
Name: Mar, dtype: object
47    8.7(47.7)
Name: Apr, dtype: object
47    14.2(57.6)
Name: May, dtype: object
47    17.0(62.6)
Name: Jun, dtype: object
47    19.2(66.6)
Name: Jul, dtype: object
47    18.3(64.9)
Name: Aug, dtype: object
47    13.5(56.3)
Name: Sep, dtype: object
47    8.5(47.3)
Name: Oct, dtype: object
47    3.3(37.9)
Name: Nov, dtype: object
47    −0.7(30.7)
Name: Dec, dtype: object


[-1.8, -0.6, 2.8, 8.7, 14.2, 17.0, 19.2, 18.3, 13.5, 8.5, 3.3, -0.7]

In [16]:
blablah = [-1.8, -0.6, 2.8, 8.7, 14.2, 17.0, 19.2, 18.3, 13.5, 8.5, 3.3, -0.7]