# <font color='orange'><center>Typical Meteorological Year Creation for the North of the Mediterranean Sea</center>

In [2]:
# Importing dependencies
import pandas as pd
import glob
import os
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline

|Case Study Location: North of Mediterranean Sea|
|-----------------------------|
|<p align='left'>**Period:** 2000 : 2015</p>|  
|<p align='left'>__Time step:__ 1 hour</p>|
|<p align='left'>**Points:** 60</p>|
|<p align='left'>**Latitude:** 40.5 : 43</p>|
|<p align='left'>__Longitude:__ 3.5 : 8</p>|
|<p align='left'>__Spatial step:__ 0.5º</p>|

## <font color='purple'> Reading files and initiating variables

In [3]:
# Path where to find the initial time series files
path = r'C:\Users\Cris\Documents\Projects\Time series MERRA 2\MEDITERRANEAN SEA NORTH' # use your path

# Defining all files that must be retrieved
all_files = glob.glob(path + "/*.csv")

# Path used to store the files with the Weighted Sum for each geographical location
path2 = r'C:\Users\Cris\Documents\Projects\Energy_resource\Mediterranean_Sea' + \
r'\Mediterranean_Sea_North\NORTH_WS_each_point'

# Creating an empty list for the files that are going to be analysed
li = []

# Iterating between the .csv files and appending each one to the empty list
for filename in all_files:
    frame = pd.read_csv(filename, skiprows=25, delimiter=';', index_col=None, header=None)
    li.append(frame)

# Initializing a variable for the total Weighted Sum, considering all geographical locations
W_total = 0
TMY_values_list = []

# Opening a text file in which the best Typical Meteorological Year for each geographical location will be inserted
f = open('Typical_Meteorological_Year_MS_North.txt', 'w')

## <font color='purple'> Process of Cleaning the dataframe and calculating the Weighted Sum for each location

In [4]:
# Initiating a loop for iterating between all geographycal locations
for z in range (len(li)):
    # Reading the CSV file and transforming into a dataframe
    df = pd.DataFrame(li[z], index=None, columns=None)

    # Creating a header list
    Header = ['Date', 'UT time', 'Temperature', 'Relative Humidity', 'Pressure', 'Wind Speed', 'Wind Direction', 'Rainfall', 
             'Snowfall', 'Snow depth', 'Short-wave irradiation']

    # Associating the header elements to each column
    df.columns = Header 

    # Removing the unnecessary columns
    df.drop(['UT time', 'Temperature', 'Relative Humidity', 'Pressure', 'Wind Direction', 'Rainfall', 'Snowfall', 
         'Snow depth'], axis=1, inplace=True)

    df['Day'] = pd.DatetimeIndex(df['Date']).day # Creating a column with the days
    df['Month'] = pd.DatetimeIndex(df['Date']).month # Creating a column with the months
    df['Year'] = pd.DatetimeIndex(df['Date']).year # Creating a column with the years

    # Rearranging the columns, so the the newly created ones appears first
    cols = df.columns.tolist()
    cols = cols[-1:] + cols[-2:-1] + cols[-3:-2] + cols[:-3]
    df = df.loc[:, cols]

    # Dropping the 'Date' column
    df.drop(['Date'], axis=1, inplace=True)

    df.drop(df.index[df['Year'] == 2015], axis=0, inplace=True) # Dropping the rows related to the single day of January 2015
    df['Short-wave irradiation'] = df['Short-wave irradiation']/1000 # Converting the wh values into kwh

    # Creating a list of the months
    months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 
              'November', 'December']

    years = df.Year.unique() # Creating a list of the years
    parameters = ['Short-wave irradiation', 'Wind Speed', 'Wind Speed'] # Base parameters for the analysis

    size = len(years) * len(months)
    LT_y = [] # Initiating the long-term list
    month_y = [] # Initiating the list which will contain data from each month in each year
    FS_list = [] # Initiating the list for the FS factor

    for j in range(len(parameters)):
        if j == 0: # Solar irradiation
            df1 = df.groupby(['Year', 'Month', 'Day'], as_index=False).sum() # Grouping the dataframe for the GHI
        elif j == 1: # Mean wind velocity
            df1 = df.groupby(['Year', 'Month', 'Day'], as_index=False).mean() # Grouping for the mean wind speed parameter
        else: # Maximum wind velocity
            df1 = df.groupby(['Year', 'Month', 'Day'], as_index=False).max() # Grouping for the daily max wind speed parameter
        
        for i in range (len(months)):
            xindex = df1.loc[df1['Month'] == i + 1] # Long-term value for each month
            x = np.sort(xindex[parameters[j]].values) # Sorting the values for the key column to be analysed
            y = (np.cumsum(x)-min(np.cumsum(x)))/(max(np.cumsum(x))-min(np.cumsum(x))) # Cumulative distribution for long-term
            LT_y.append(y)

            for year in years: # Loop for each month in each year
                xindex1 = xindex.loc[df1['Year'] == year]
                x1 = np.sort(xindex1[parameters[j]].values)
                y1 = (np.cumsum(x1)-min(np.cumsum(x1)))/(max(np.cumsum(x1))-min(np.cumsum(x1))) 
                month_y.append(y1)

                n = len(x1) # Number of days
                delta_updated = 0

                # Loop for finding the value y(x) equivalent for long-term and short-term and comparing both
                for position,value in enumerate(x1): 
                    inx = np.where(x == value)
                    delta = abs(y1[position] - y[inx])
                    delta_updated += delta[0]
                FS = delta_updated / n
                FS_list.append(FS) # Finkelstein-Schafer (FS) statistics

    LT_y_array = np.array(LT_y).reshape(len(parameters), len(months)) # columns=months, rows=parameters
    month_y_array = np.array(month_y) # 15 years, 12 months, 3 parameters

    # Arrays with the FS factor for each evaluation parameter
    FS_matrix_sum = np.array(FS_list[:size]).reshape(len(months),len(years)).transpose()
    FS_matrix_mean = np.array(FS_list[size:(size * 2)]).reshape(len(months),len(years)).transpose()
    FS_matrix_max = np.array(FS_list[(size * 2):]).reshape(len(months),len(years)).transpose()

    # Creating dataframes from the arrays
    df_FS_sum = pd.DataFrame(data=FS_matrix_sum, index=years, columns=months)
    df_FS_mean = pd.DataFrame(data=FS_matrix_mean, index=years, columns=months)
    df_FS_max = pd.DataFrame(data=FS_matrix_max, index=years, columns=months)

    # Weight factor for each evaluation parameter
    w_sum = 0.5
    w_mean = 0.25
    w_max = 0.25

    # Creating a matrix with the weighted sum, converting it to a dataframe and saving into a .csv file
    WS_matrix = w_sum * FS_matrix_sum + w_mean * FS_matrix_mean + w_max * FS_matrix_max # Weighted dataframe
    WS_df = pd.DataFrame(data=WS_matrix, index=years, columns=months)
    WS_df.to_csv(os.path.join(path2, r'North_WS_point_{}.csv'.format(z+1)), header=True)
    
    # Defining the Typical Meteorological Year (TMY) for each point
    TMY_array = np.zeros((1, len(months))) # Best months for the TMY
    for i in range(len(months)):
        Tm = WS_df.iloc[:, i].idxmin()
        TMY_array[0, i] = Tm
    TMY_list = TMY_array[0]
    TMY_best_i = dict(zip(months, TMY_list))
    TMY_values_list.append(TMY_list)
    
    # Writing the dictionary composed of the best Typical Meteorological Year for each location in a text file
    f.write('Point {}:\n'.format(z + 1))
    f.write(str(TMY_best_i) + '\n\n')

    # Updating the WS total value by adding up the WS for each point
    W_total = W_total + WS_df

# Saving the global weighted sum dataframe into a .csv file
W_total.to_csv(os.path.join(path2, r'NORTH_W_total.csv'), header=True)

## <font color='purple'> Obtaining the compiled TMY based on a combination of all locations

In [5]:
# Defining the Typical Meteorological Year (TMY)
TMY_array_best = np.zeros((1, len(months))) # Best months for the TMY
for i in range(len(months)):
    Tmonth = W_total.iloc[:, i].idxmin()
    TMY_array_best[0, i] = Tmonth
TMY_list_best = TMY_array_best[0]
TMY_best = dict(zip(months, TMY_list_best))
TMY_values_list.append(TMY_list_best)

# Creating a dataframe with the TMY for each location and the global one
TMY_df = pd.DataFrame(TMY_values_list, columns=months)
# Saving the TMY dataframe to a .csv file
TMY_df.to_csv('TMY_north.csv', header=True)

# Adding to the already opened .txt file the global TMY and closing the file at the end
f.write('General TMY:\n')
f.write(str(TMY_best))
f.close()