# Notebook 02:

# Fetching temperature and humidity data from Wolfram Mathematica:

In [1]:
import pandas as pd
from wolframclient.evaluation import WolframLanguageSession
from wolframclient.language import wl, wlexpr

import os
import re
import datetime
import pickle

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
#To use Wolfram's kernel we need it's path.
wolfram_path = r'H:\wolfram\WolframKernel.exe'

In [3]:
#Deleting 'khur va biabanak' and 'SHR KORD'
# Since Wolfram Mathematica doesn't have the data of cities 'khur va biabanak'
# and 'SHR KORD', we can't do further processes on their data.
# so we just delete them from all_rads.pkl

with open(r'..\data\interim\01_reading_data\all_rads.pkl', 'rb') as f:
    all_rads = pickle.load(f)

all_rads = all_rads[all_rads.city != 'khur va biabanak']
all_rads = all_rads[all_rads.city != 'SHR KORD']

with open(r'..\data\interim\01_reading_data\all_rads.pkl', 'wb') as f:
    pickle.dump(all_rads, f)

In [4]:
cities_full_names = {'AHV' : 'Ahvaz', 'ARD' : 'Ardabil', 'ARK' : 'Arak',
                     'BIR' : 'Birjand', 'BJN' : 'Bojnurd', 'BND' : 'Bandar Abbas',
                     'ESF' : 'Isfahan', 'GRG' : 'Gorgan', 'HMD' : 'Hamedan',
                     'ILM' : 'Ilam', 'JSK' : 'Jask', 'KRJ' : 'karaj',
                     'KRM' : 'Kerman', 'KRMS' : 'Kermanshah', 'MSH' : 'Mashhad',
                     'ORO' : 'Urmia', 'QOM' : 'Qom', 'QZV' : 'Qazvin',
                     'SHZ' : 'Shiraz', 'SMN' : 'Semnan', 'TBR' : 'Tabriz',
                     'TEH' : 'Tehran', 'YAS' : 'yasouj', 'yazd' : 'Yazd',
                     'ZHD' : 'Zahedan', 'ZNJ' : 'Zanjan'}

## Fetch temperature from Mathematica:

In [5]:
#Fetch temperature data by using Wolfram Mathematica,
#and save it in CSVs.

session = WolframLanguageSession(wolfram_path)

for s_name, f_name in cities_full_names.items():
    #Load data.
    api = 'data = WeatherData["{}", "MeanTemperature", {{{{1993, 1, 1}}, {{2021, 12, 31}}, "Day"}}]'.format(f_name)
    session.evaluate(wlexpr(api))
    #Save data. CSV files will be saved in the notebook's directory.
    api = 'Export["{}.csv", data, "CSV"]'.format(s_name)
    session.evaluate(wlexpr(api))

## Load fetched cities temperature data, and combine them:

In [6]:
#Create a DataFrame that holds all cities temperature values.

start = datetime.datetime(1993, 1, 1)
end = datetime.datetime(2021, 12, 31)

index = pd.date_range(start, end)
temperature_frame = pd.DataFrame(index=index)

In [7]:
#Open each temperature CSV files, read them line by line,
#parse each line, and save the values in a Series.
#Each Series only contains temperature of a single city.
#Finaly each Series is added to the DataFrame that holds all of temperature values.

for name in cities_full_names.keys():
    index = []
    values = []
    
    path = '{}.csv'.format(name)
    with open(path) as f:
        lines = f.readlines()

    for line in lines:

        #Looking for the time value in the readed line.
        time = re.findall(r'{(.+)}', line)[0]
        time = time.replace(" ", "").split(',')[:3]
        time = [int(i) for i in time]
        time = datetime.datetime(time[0], time[1], time[2])
        index.append(time)

        #Looking for the temperature value in the readed line.
        value = re.findall(r'Quantity\[(.+),', line)[0]
        values.append(float(value))

    series = pd.Series(values, index)
    temperature_frame[name] = series

In [8]:
temperature_frame.head()

Unnamed: 0,AHV,ARD,ARK,BIR,BJN,BND,ESF,GRG,HMD,ILM,...,QOM,QZV,SHZ,SMN,TBR,TEH,YAS,yazd,ZHD,ZNJ
1993-01-01,8.89,-8.33,-15.33,1.39,-3.28,19.11,0.44,4.06,-15.33,2.94,...,0.06,-0.72,4.61,-1.83,-4.94,0.06,0.39,2.83,7.56,-5.39
1993-01-02,9.0,-12.28,3.0,0.78,-3.72,16.78,,4.67,3.0,2.56,...,0.22,6.33,,5.39,-5.28,0.22,,5.61,7.33,5.94
1993-01-03,12.89,-5.17,-4.06,4.94,-1.28,19.28,0.72,8.22,-4.06,3.39,...,2.17,2.22,6.06,19.72,0.39,2.17,2.28,2.33,9.06,4.44
1993-01-04,13.56,-5.83,-3.06,4.78,-2.28,17.06,1.83,6.67,-3.06,4.39,...,0.83,-0.89,5.67,1.72,-2.11,0.83,2.72,0.67,6.94,-2.33
1993-01-05,12.94,-7.0,-2.22,4.39,-1.06,17.0,3.61,5.28,-2.22,4.67,...,1.72,-0.94,5.17,2.39,-2.61,1.72,5.17,2.33,6.67,-3.0


In [9]:
temperature_frame.isna().sum()

AHV      98
ARD      68
ARK      58
BIR      55
BJN      55
BND      53
ESF      67
GRG      55
HMD      77
ILM      76
JSK      53
KRJ      55
KRM      65
KRMS     76
MSH      55
ORO      53
QOM      56
QZV      55
SHZ      77
SMN      54
TBR      66
TEH      56
YAS     146
yazd     66
ZHD      80
ZNJ      78
dtype: int64

In [10]:
temperature_frame.ffill(limit=2, inplace=True)

In [11]:
temperature_frame.isna().sum()

AHV     12
ARD      6
ARK      7
BIR      6
BJN      6
BND      6
ESF      8
GRG      6
HMD      8
ILM      7
JSK      6
KRJ      6
KRM      9
KRMS     7
MSH      6
ORO      6
QOM      6
QZV      6
SHZ     10
SMN      6
TBR     10
TEH      6
YAS     29
yazd     9
ZHD      6
ZNJ      7
dtype: int64

In [12]:
#Pickling the temperature_frame:
with open(r'..\data\external\02_fetching_mathematica\temperature_frame.pkl', 'wb') as f:
    pickle.dump(temperature_frame, f)

In [13]:
#Now that all CSV files are loaded. we can delete them.
for name in cities_full_names.keys():
    path = '{}.csv'.format(name)
    os.remove(path)

## Fetch humidity from Mathematica:

In [14]:
#Fetch humidity data by using Wolfram Mathematica,
#and save it in CSVs.

session = WolframLanguageSession(wolfram_path)

for s_name, f_name in cities_full_names.items():
    #Load data.
    api = 'data = WeatherData["{}", "MeanHumidity", {{{{1993, 1, 1}}, {{2021, 12, 31}}, "Day"}}]'.format(f_name)
    session.evaluate(wlexpr(api))
    #Save data. CSV files will be saved in the notebook's directory.
    api = 'Export["{}.csv", data, "CSV"]'.format(s_name)
    session.evaluate(wlexpr(api))

## Load fetched cities temperature data, and combine them:

In [15]:
#Create a DataFrame that holds all cities humidity values.

start = datetime.datetime(1993, 1, 1)
end = datetime.datetime(2021, 12, 31)

index = pd.date_range(start, end)
humidity_frame = pd.DataFrame(index=index)

In [16]:
#Open each humidity CSV files, read them line by line,
#parse each line, and save the values in a Series.
#Each Series only contains humidity of a single city.
#Finaly each Series is added to the DataFrame that holds all of humidity values.

for name in cities_full_names.keys():
    index = []
    values = []
    
    path = '{}.csv'.format(name)
    with open(path) as f:
        lines = f.readlines()

    for line in lines:

        if 'Missing' not in line:
            line = line.strip()
            
            #Looking for the time value in the readed line.
            time = re.findall(r'{(.+)}', line)[0]
            time = time.replace(" ", "").split(',')[:3]
            time = [int(i) for i in time]
            time = datetime.datetime(time[0], time[1], time[2])
            index.append(time)    

            #Looking for the temperature value in the readed line.
            value = re.findall(r']",(.+)', line)[0]
            values.append(float(value))

    series = pd.Series(values, index)
    humidity_frame[name] = series

In [17]:
humidity_frame.head()

Unnamed: 0,AHV,ARD,ARK,BIR,BJN,BND,ESF,GRG,HMD,ILM,...,QOM,QZV,SHZ,SMN,TBR,TEH,YAS,yazd,ZHD,ZNJ
1993-01-01,0.725,0.765,0.915,0.673,0.78,0.782,0.462,0.764,0.915,0.554,...,0.618,0.582,0.602,0.722,0.655,0.618,0.533,0.445,0.791,0.599
1993-01-02,0.784,0.862,0.723,0.609,0.75,0.883,,0.854,0.723,0.562,...,0.69,0.926,,0.772,0.737,0.69,,0.707,0.681,0.856
1993-01-03,0.896,0.983,0.927,0.5,0.693,0.665,0.879,0.667,0.927,0.86,...,0.636,0.637,0.943,0.164,0.706,0.636,0.946,0.659,0.533,0.922
1993-01-04,0.929,0.974,0.932,0.822,0.936,0.905,0.931,0.837,0.932,0.818,...,0.897,0.956,0.943,0.869,0.84,0.897,0.819,0.96,0.608,0.718
1993-01-05,0.893,0.941,0.887,0.907,0.877,0.945,0.788,0.911,0.887,0.702,...,0.852,0.891,0.907,0.704,0.711,0.852,0.907,0.916,0.922,0.729


In [18]:
humidity_frame.isna().sum()

AHV     108
ARD     134
ARK      80
BIR      74
BJN      84
BND      62
ESF      91
GRG     101
HMD     100
ILM     139
JSK      75
KRJ      61
KRM      84
KRMS     99
MSH      65
ORO      63
QOM      77
QZV      93
SHZ      97
SMN     102
TBR      76
TEH      61
YAS     232
yazd     88
ZHD     101
ZNJ      85
dtype: int64

In [19]:
humidity_frame.ffill(limit=2, inplace=True)

In [20]:
humidity_frame.isna().sum()

AHV     12
ARD      7
ARK      8
BIR      6
BJN      6
BND      6
ESF      9
GRG      6
HMD      8
ILM      8
JSK      6
KRJ      6
KRM     10
KRMS     8
MSH      6
ORO      6
QOM      6
QZV      6
SHZ     10
SMN      6
TBR     12
TEH      6
YAS     34
yazd     9
ZHD      7
ZNJ      7
dtype: int64

In [21]:
#Pickling the humidity_frame:
with open(r'..\data\external\02_fetching_mathematica\humidity_frame.pkl', 'wb') as f:
    pickle.dump(humidity_frame, f)

In [22]:
#Now that all CSV files are loaded. we can delete them.
for name in cities_full_names.keys():
    path = '{}.csv'.format(name)
    os.remove(path)

## Conclusion:
In this notebook:

1. Temperature and humidity data of cities are fetched from Wolfram Mathematica.
2. Fetched temperature and humidity data are loaded and combined into\
    two DataFrames(temperature_frame, humidity_frame).
4. Finaly temperature_frame and humidity_frame get pickled.