# Investigation of different modules

## Import statements

In [3]:
import pandas as pd
import numpy as np
import datetime
import os
import re
import matplotlib.pyplot as plt

## Loading data and folder settings

In [5]:
folder_data = '/Users/hkromer/01_Projects/10.SolarAnlage/01.Analytics/Moduldaten/'

# output dataframe
# current time for the year month day
currentDT = datetime.datetime.now()
directory = f'{folder_data}/cleaned_data'
if not os.path.exists(directory):
    os.makedirs(directory)
filename_df_out = f'{directory}/{currentDT.year}-{currentDT.month}-{currentDT.day}_solarData.csv'

## Import data and combine into one dataframe

In [10]:
# list of files
files = os.listdir(folder_data)
files = [f for f in files if f.endswith('.csv')]

df = pd.DataFrame()
for file in files:
	df_t = pd.read_csv('{}/{}'.format(folder_data,file))
	df = df.append(df_t)

# change columns to only contain identifier
cols = df.columns

id_cols = [re.findall(r'(1.1.\d+) E',c)[0] for c in cols[1:]]

my_cols = [cols[0]]

[my_cols.append(i) for i in id_cols]

df.columns = my_cols

print(df.head())
# df.to_csv(f'{filename_df_out}')

         Time   1.1.1   1.1.2   1.1.3   1.1.4   1.1.5   1.1.6   1.1.7   1.1.8  \
0  01.02.2018  505.25  245.50  476.50  476.00  484.25  470.50  480.00  480.50   
1  02.02.2018  283.00  208.50  258.25  250.00  240.50  217.75  238.75  248.00   
2  03.02.2018  236.00  183.75  213.25  211.25  216.25  199.00  217.25  217.75   
3  04.02.2018  692.75  217.00  646.75  643.50  533.25  340.50  600.25  633.75   
4  05.02.2018  612.25  301.25  578.50  572.75  555.25  486.00  547.00  569.75   

    1.1.9  ...  1.1.27  1.1.28  1.1.29  1.1.30  1.1.31  1.1.32  1.1.33  \
0  481.00  ...  293.50  254.50  287.50  286.25  306.50  303.50  302.50   
1  261.50  ...  250.25  197.00  231.75  236.50  219.75  214.50  209.00   
2  219.00  ...  188.25  154.50  183.25  183.25  191.75  189.00  186.00   
3  647.75  ...  544.25  514.00  482.00  472.75  334.25  339.75  333.50   
4  581.25  ...  391.25  345.25  356.50  357.25  332.25  331.75  326.75   

   1.1.34  1.1.35  1.1.36  
0  306.00  322.25   311.5  
1  214.75  2

## Change the index to time series

In [17]:
# Prepare a format string: time_format
time_format = '%d.%m.%Y'

# Convert date_list into a datetime object: my_datetimes
df['Time'] = pd.to_datetime(df['Time'], format=time_format)  
df = df.set_index('Time').sort_index()
print(df.info())
print(df.head())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 500 entries, 2017-10-12 to 2019-02-16
Data columns (total 36 columns):
1.1.1     499 non-null float64
1.1.2     497 non-null float64
1.1.3     499 non-null float64
1.1.4     500 non-null float64
1.1.5     499 non-null float64
1.1.6     499 non-null float64
1.1.7     499 non-null float64
1.1.8     499 non-null float64
1.1.9     499 non-null float64
1.1.10    500 non-null float64
1.1.11    500 non-null float64
1.1.12    500 non-null float64
1.1.13    499 non-null float64
1.1.14    494 non-null float64
1.1.15    496 non-null float64
1.1.16    499 non-null float64
1.1.17    499 non-null float64
1.1.18    498 non-null float64
1.1.19    499 non-null float64
1.1.20    500 non-null float64
1.1.21    494 non-null float64
1.1.22    499 non-null float64
1.1.23    500 non-null float64
1.1.24    500 non-null float64
1.1.25    500 non-null float64
1.1.26    499 non-null float64
1.1.27    500 non-null float64
1.1.28    499 non-null float64
1.1.29  

## Relate modules to position - Auslegung
from monitoring

In [36]:
module_position = {'Südwesten': [f'1.1.{s}' for s in [1, 16, 3,4,5,10,9,8,7,6,11,12,13,14]],
                  'Westen': [f'1.1.{s}' for s in [15,2,18,17,19,20]],
                  'Südost_oben': [f'1.1.{s}' for s in [28,27,26,25,24,23,22,21,29,30]],
                  'Südost_unten': [f'1.1.{s}' for s in [31,32,33,34,35,36]]}

print(df.loc[:,module_position['Südwesten']].head())

              1.1.1   1.1.16    1.1.3    1.1.4    1.1.5   1.1.10    1.1.9  \
Time                                                                        
2017-10-12   208.00   205.50   203.75   204.75   207.00   202.75   204.25   
2017-10-13  1428.00  1420.50  1400.50  1410.50  1396.50  1423.00  1399.25   
2017-10-14  1386.50  1375.75  1361.00  1368.25  1356.50  1378.25  1358.25   
2017-10-15  1377.75  1372.00  1355.50  1362.00  1347.25  1373.00  1348.75   
2017-10-16  1326.25  1319.50  1302.50  1310.00  1295.50  1310.50  1298.50   

              1.1.8    1.1.7    1.1.6   1.1.11  1.1.12   1.1.13  1.1.14  
Time                                                                     
2017-10-12   204.50   205.00   203.75   207.25   201.5   207.25   202.5  
2017-10-13  1399.50  1336.25  1022.75  1429.50  1404.0  1398.25  1284.0  
2017-10-14  1358.00  1293.25   987.50  1388.00  1363.0  1359.25  1242.5  
2017-10-15  1348.25  1280.75   992.75  1375.75  1360.0  1351.50  1239.5  
2017-10-16  1296

# Plot Monthly aggregated energy per module position

- First resample to monthly data, aggregate by sum.

- Change the datetimeindex format to the long name for the month.

- Create a new dataframe with the aggregated data

In [59]:
# First resample to monthly data, aggregate by sum.
df_monthly = df.resample('M').sum()

# Change the datetimeindex format to the long name for the month.
df_monthly = df_monthly.set_index(df_monthly.index.strftime('%B'))

# print(df_monthly.loc[:, module_position['Südwesten']].sum(axis='columns'))
# print(df_monthly.head())

# Create a new dataframe with the aggregated data
for pos in module_position:
    g = df_monthly.loc[:, module_position[pos]].sum(axis='columns')
    _ = ax.

October      229564.75
November     149750.25
December     116781.50
January      130969.75
February     239422.00
March        334862.50
April        513220.50
May          486430.00
June         523340.25
July         594672.00
August       526507.50
September    492995.50
October      361423.00
November     208056.50
December     138444.00
January      115137.75
February     161246.25
dtype: float64
October       34555.25
November      24487.50
December      15512.75
January       24434.00
February      40355.75
March         80843.75
April        132034.50
May          161411.50
June         179793.75
July         193208.00
August       143939.50
September    103205.75
October       62221.75
November      27919.00
December      18976.50
January       16147.00
February      19492.25
dtype: float64
October       95638.50
November      71285.75
December      53566.75
January       65721.75
February     121047.00
March        195936.00
April        318257.75
May          311672.50
June