# Imports

In [None]:
import csv
import json
import datetime
import os

from collections import OrderedDict
from pprint import pprint as pp

In [None]:
import arcpy
import numpy as np
import pandas as pd
import pickle
import requests
import seaborn as sns
import matplotlib.pyplot as plt

from dateutil import parser
from netCDF4 import Dataset, netcdftime
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.decomposition import PCA

# XGEO

In [None]:
sol_and_sin = True
base_url = 'http://h-web01.nve.no/chartserver/ShowData.aspx?req=getchart&ver=1.0&vfmt=json&time={time}&chd=ds=hgts,id={id};{url_id}'
FIELDS = [
    ('tmgr', 'temperaturendring'),
    ('tm', 'temperatur'),
    ('rr', 'nedbør'),
    ('gwb_eva', 'fordampning'),
    ('gwb_sssrel', 'vannmetning_i_jord'),
]

In [None]:
def xgeo_get_id(point: dict) -> str:
    """retruns formated id used in base_url"""
    return f"{point['x']};{point['y']}"


def xgeo_get_time(point: dict, days_back: int=2) -> str:
    """return formated time used in the base_url"""
    time_delta = datetime.timedelta(days=days_back)
    time_to = parser.parse(point['time'], dayfirst=False)
    time_from = time_to - time_delta
    return f"{time_from.strftime('%Y%m%dT0000')};{time_to.strftime('%Y%m%dT0000')}"


def xgeo_get_values (point: dict) -> [dict, bool]:
    """
    takes a point as input and loops throug the FIELDS and collects the data
    for the point from xgeo

    retruns point or False if failed
    """
    for field_id, field_name in FIELDS:
        url = base_url.format(time=xgeo_get_time(point), id=xgeo_get_id(point), url_id=field_id)
        r = requests.get(url).json()
        if len(r[0]['SeriesPoints']) == 0:
            return False
        # sleep(1)  # takes to long
        if r[0]['SeriesPoints'][2]['Value'] is None:
            return False
        point[f'{field_name}_days_back_0'] = r[0]['SeriesPoints'][2]['Value']
        point[f'{field_name}_days_back_1'] = r[0]['SeriesPoints'][1]['Value']
        point[f'{field_name}_days_back_2'] = r[0]['SeriesPoints'][0]['Value']
    return point


def xgeo_create_false_data(point: dict) -> list:
    """
    takes a point as input and creates 3 ekstra points in the same location
    but different time, and collects the xgeo data

    returns list with the new points
    """
    false_points = []
    for i in [-1, 1]:
        time_delta = datetime.timedelta(days=(10*i))
        false_point = OrderedDict()
        false_point['x'] = point['x']
        false_point['y'] = point['y']
        false_point['FID'] = point['FID'] + f"_{i}"
        false_point['time'] = (parser.parse(point['time'], dayfirst=False) + time_delta).strftime('%Y-%m-%d 00:00:00')
        false_point['solslyng'] = False
        if sol_and_sin:
            false_point['sol_rad'] = point['sol_rad']
            false_point['sin'] = point['sin']
        false_point = xgeo_get_values(false_point)
        if false_point:
            false_points.append(false_point)
    return false_points


def xgeo_create_points_from_csv(path_to_csv):
    points = []
    with open(path_to_csv, newline='') as f:
        for row in csv.reader(f, delimiter=';'):
            if row[0] == 'XCoord':
                continue
            point = OrderedDict()
            point['x'] = row[0].replace(',', '.')
            point['y'] = row[1].replace(',', '.')
            point['FID'] = row[2]
            point['time'] = parser.parse(row[3], dayfirst=True).strftime('%Y-%m-%d 00:00:00')
            point['solslyng'] = True #int(row[-1])
            point['sol_rad'] = float(row[4].replace(',', '.'))
            point['sin'] = float(row[5].replace(',', '.'))
            point = xgeo_get_values(point)
            if not point is False:
                points.append(point)
                points += xgeo_create_false_data(point) if point['solslyng'] else []
    return points


def xgeo_create_x_y(points: list):
    """creates X data, and Y label to be used in SVC"""
    X = []
    Y = []
    for point in points:
        X.append(list(point.values())[5:])  # attributes
        Y.append(int(list(point.values())[4]))  # solslyng false or true
    return np.array(X), np.array(Y)

In [None]:
path_to_csv =  r'C:\Users\student\Desktop\python\xgeo\solslyng_p_mb_m_s_r.csv'
p = xgeo_create_points_from_csv(path_to_csv)

In [None]:
xgeo_X, xgeo_y = xgeo_create_x_y(p)

# SVC


In [None]:
def train_svc(X, y):
    """Elise template :) creates the svc model"""
    pipe = make_pipeline(StandardScaler(), SVC())
    param_c = np.logspace(-4, 4)
    param_gamma = np.logspace(-4, 4)
    param_grid = [
        {
            'svc__C': param_c,
            'svc__gamma': param_gamma,
            'svc__kernel': ['rbf'],
            'svc__random_state': [1],
            'svc__probability': [True],
            #'pca__n_components': [2, 8, 16]
        }
    ]
    gs = GridSearchCV(
        estimator=pipe,
        param_grid=param_grid,
        scoring='accuracy',
        cv=10,
        n_jobs=7
    )
    gs = gs.fit(X, y)
    return gs

# RUN

In [None]:
gs = train_svc(xgeo_X, xgeo_y)

In [None]:
print(gs.best_score_)
print(gs.best_params_)

# SNS

In [None]:
df = pd.DataFrame([_x + [_y] for _x, _y in zip(xgeo_X, xgeo_y)])
corr = df.corr().abs()

In [None]:
plt.show()

# Predict


In [None]:
# read from csv an creat predicet point
def xgeo_create_predict_points_from_csv(path_to_csv, predict_date):
    points = []
    with open(path_to_csv, newline='') as f:
        for i, row in enumerate(csv.reader(f, delimiter=';')):
            print(i)
            if row[0] == 'XCoord':
                continue
            point = OrderedDict()
            try:
                point['x'] = row[0].replace(',', '.')
                point['y'] = row[1].replace(',', '.')
                point['FID'] = row[2]
                point['time'] = parser.parse(predict_date, dayfirst=True).strftime('%Y-%m-%d 00:00:00')
                point['solslyng'] = None
                point['sol_rad'] = float(row[3].replace(',', '.'))
                point['sin'] = float(row[4].replace(',', '.'))
                point = xgeo_get_values(point)
                if not point is False:
                    points.append(point)
            except:
                continue
    return points


def xgeo_create_X(points: list):
    """creates X data, to be used in prediction"""
    X = []
    for point in points:
        X.append(list(point.values())[5:])  # attributes
    return np.array(X)

In [None]:
# set prediction date
date_str = '18.07.2015'  # SET
path_to_predict_csv_hele_norge = r"C:\Users\student\Desktop\python\ml\Hele_norge.csv"
predict_points_hele = xgeo_create_predict_points_from_csv(path_to_predict_csv_hele_norge, date_str)

In [None]:
pred_X = xgeo_create_X(predict_points_hele)

In [None]:
pred_y_ = p_gs.predict_proba(pred_X)

In [None]:
# create point and proba csv

with open('xgeo/proba_hele_11072015.csv', 'w') as f:
    writer = csv.writer(f, delimiter=',')
    for x_y, proba in zip([list(p.values()) for p in predict_points_hele], pred_y_):
        writer.writerow([x_y[0], x_y[1], proba[1]])                
