# Challenge Solaire

## Imports

In [None]:
import pandas as pd
import wget as wg
import numpy as np
import datetime
import os
import sklearn as sk
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt


## Telechargement des données

In [None]:
url = "https://gml.noaa.gov/aftp/data/radiation/surfrad/Boulder_CO/"
years = ["2019", "2020", "2021", "2022"]
days = [f'{i:03d}' for i in range(1, 366)]
for year in years:
    for day in days:
        try :
            wg.download(f"https://gml.noaa.gov/aftp/data/radiation/surfrad/Boulder_CO/{year}/tbl{year[2:]}{day}.dat")
        except Exception as e :
            print(e)

## Construire des données propres

In [None]:
def convert(x: str) -> float:
    [value, indicator] = x.split(" ")
    if indicator == "1":
        return np.nan
    else: 
        return float(value)
    
def convert_to_nan(x: str) -> float:
    if x:
        return np.nan
    else:
        return float(x)
def get_time(df :pd.DataFrame) -> pd.Series:
    return

In [None]:
def convert_date(year, year_day, month, day, hour, minute):
    return datetime.datetime(year, month, day, hour, minute)

In [None]:
col_names = [ "Date", "zen", "dw_solar", "uw_solar", "direct_n", "diffuse", "dw_ir", "dw_casetemp", "dw_dometemp", "uw_ir", "uw_casetemp", "uw_dometemp", "uvb", "par", "netsolar", "netir", "totalnet", "temp", "rh", "windspd", "winddir", "pressure"]


In [None]:
def get_clean_df(path: str):
    df = pd.read_csv(path, skiprows=2, delim_whitespace=True)     
    columns = [pd.Series([tuple(df.iloc[i, 0:6].map(lambda x : int(x))) for i in range(len(df))]).map(lambda x : convert_date(*x)), df.iloc[:,7]]
    for i in range(8, len(df.columns), 2):
        columns.append(df.iloc[:,i] + df.iloc[:,i+1].map(convert_to_nan))
    df_final = pd.concat(tuple(columns), axis=1 )
    df_final.columns = col_names
    df_final.set_index("Date", inplace=True)
    df_final.dropna(how="any", inplace=True)
    print(path)
    return df_final

In [None]:
path = r"C:\Users\XAVIER\Documents\Data Sophia\Challenge_Solaire"
files = [ el for el in os.listdir(path) if el.split(".")[-1] == "dat"]
df = pd.concat([get_clean_df(f"{path}\\{el}")for el in files])


In [None]:
df_final = df.dropna(how="any")
df_final = df_final.resample("3600S").mean()
df_final.index = df_final.index.map(lambda x : x - datetime.timedelta(0, 3600, 0))
df_final.to_csv("cleaned_data.csv")


## Analyse de données

mettre des plots et des scatters

In [None]:
df_cleaned = pd.read_csv("cleaned_data.csv")
df_cleaned["Date"] = df_cleaned["Date"].map(lambda x : datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S"))
df_cleaned.set_index("Date", inplace=True)
df_cleaned

In [None]:
scatter_matrix(df_cleaned, alpha=0.5, s=60,
               figsize=(20, 20));

## Ecart au solaire

In [None]:
df_mcclear = pd.read_csv("mcclear.csv", skiprows=37, encoding="utf8", sep=";")
df_mcclear["Observation period"] = df_mcclear["Observation period"].map(lambda x : datetime.datetime.strptime(x.split("/")[-1], "%Y-%m-%dT%H:%M:%S.%f"))
df_mcclear.set_index("Observation period", inplace=True)
df_mcclear.head(30)

TOA: Irradiation on horizontal plane at the top of atmosphere (Wh/m2) computed from Solar Geometry 2

Clear sky GHI: Clear sky global irradiation on horizontal plane at ground level (Wh/m2)

Clear sky BHI: Clear sky beam irradiation on horizontal plane at ground level (Wh/m2)

Clear sky DHI: Clear sky diffuse irradiation on horizontal plane at ground level (Wh/m2)

Clear sky BNI: Clear sky beam irradiation on mobile plane following the sun at normal incidence (Wh/m2)

In [None]:
df_global = pd.concat([df_cleaned, df_mcclear], axis=1).dropna(how="any")
df_global

In [None]:
df_global = df_global[df_global['TOA'] >= 50]
df_global['cor_uvb'] = df_global['dw_solar']/df_global['TOA']
plt.plot(df_global['dw_solar'])
plt.show()
plt.plot(df_global['cor_uvb'])
plt.show()

## Création de jeu de tests/apprentissage

## Prédiction

## Conclusion