# INTRODUCTION JUPYTER NOTEBOOKS IN VSCODE 

## IN This tutorial, we will cover 

In [1]:
import pandas as pd

#Read in power and weather data to dataframes
pwr = pd.read_csv('./data/Plant_2_Generation_Data.csv')
wtr = pd.read_csv('./data/Plant_2_Weather_Sensor_Data.csv')

wtr['DATE_TIME'] = pd.to_datetime(wtr['DATE_TIME'])
pwr['DATE_TIME'] = pd.to_datetime(pwr['DATE_TIME'])

print(pwr.head())
wtr.head()

   DATE_TIME  PLANT_ID       SOURCE_KEY  DC_POWER  AC_POWER  DAILY_YIELD  \
0 2020-05-15   4136001  4UPUqMRk7TRMgml       0.0       0.0  9425.000000   
1 2020-05-15   4136001  81aHJ1q11NBPMrL       0.0       0.0     0.000000   
2 2020-05-15   4136001  9kRcWv60rDACzjR       0.0       0.0  3075.333333   
3 2020-05-15   4136001  Et9kgGMDl729KT4       0.0       0.0   269.933333   
4 2020-05-15   4136001  IQ2d7wF4YD8zU1Q       0.0       0.0  3177.000000   

    TOTAL_YIELD  
0  2.429011e+06  
1  1.215279e+09  
2  2.247720e+09  
3  1.704250e+06  
4  1.994153e+07  


Unnamed: 0,DATE_TIME,PLANT_ID,SOURCE_KEY,AMBIENT_TEMPERATURE,MODULE_TEMPERATURE,IRRADIATION
0,2020-05-15 00:00:00,4136001,iq8k7ZNt4Mwm3w0,27.004764,25.060789,0.0
1,2020-05-15 00:15:00,4136001,iq8k7ZNt4Mwm3w0,26.880811,24.421869,0.0
2,2020-05-15 00:30:00,4136001,iq8k7ZNt4Mwm3w0,26.682055,24.42729,0.0
3,2020-05-15 00:45:00,4136001,iq8k7ZNt4Mwm3w0,26.500589,24.420678,0.0
4,2020-05-15 01:00:00,4136001,iq8k7ZNt4Mwm3w0,26.596148,25.08821,0.0


In [13]:
df = pd.merge(pwr, wtr[['DATE_TIME','PLANT_ID','AMBIENT_TEMPERATURE','MODULE_TEMPERATURE','IRRADIATION']], on=['DATE_TIME','PLANT_ID'])
df['LOSS'] = df['DC_POWER'] - df['AC_POWER']

#Resample data to daily and print results
df.groupby('SOURCE_KEY')[['DATE_TIME','DAILY_YIELD','IRRADIATION','AC_POWER','DC_POWER']].resample('D', on='DATE_TIME').sum()


Unnamed: 0_level_0,Unnamed: 1_level_0,DAILY_YIELD,IRRADIATION,AC_POWER,DC_POWER
SOURCE_KEY,DATE_TIME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4UPUqMRk7TRMgml,2020-05-15,244994.528571,28.559055,16744.078571,17112.654286
4UPUqMRk7TRMgml,2020-05-16,293516.438095,23.676573,22791.219048,23305.452381
4UPUqMRk7TRMgml,2020-05-17,323713.576190,21.233595,25430.422381,25985.950476
4UPUqMRk7TRMgml,2020-05-18,378880.690476,21.495780,30516.029524,31218.581429
4UPUqMRk7TRMgml,2020-05-19,130392.439286,20.345321,7878.389048,8039.996429
...,...,...,...,...,...
xoJJ8DcxJEcupym,2020-06-13,425788.400000,20.690174,26840.577619,27443.737619
xoJJ8DcxJEcupym,2020-06-14,534758.071429,22.033630,30088.969524,30768.782381
xoJJ8DcxJEcupym,2020-06-15,507797.661905,17.765047,25049.489524,25597.239048
xoJJ8DcxJEcupym,2020-06-16,427646.757143,15.845917,21870.634286,22335.691429


## Plotting data with Plotly Express

### Advantages:


In [14]:
import plotly.express as px

daily = df.groupby('SOURCE_KEY').resample('D', on='DATE_TIME').agg({'DAILY_YIELD':'sum','IRRADIATION':'sum', 'AC_POWER':'max','DC_POWER':'max', 'MODULE_TEMPERATURE':['mean','max'], 'LOSS':['median','max']})

fig= px.scatter(x=daily[('IRRADIATION','sum')], y=daily[('LOSS','max')], color=daily.index.droplevel(1).values , opacity=0.5)
fig.show()

## Plotting with plotly "graph_objects"


In [20]:
import plotly.graph_objects as go

SOURCE_KEY = '4UPUqMRk7TRMgml'
column = 'MODULE_TEMPERATURE'

inverter_raw = df.set_index('SOURCE_KEY').loc[SOURCE_KEY]

x = inverter_raw[column].values
        
fig = go.Figure(data=[go.Histogram(x=x, opacity=0.8)])

        #Update Layout
fig.update_layout(title=go.layout.Title(text="Inverter Distribution: {}".format(column), font=dict(
                family="Times New Roman",
                size=22,
                color="#030303",                
            )))

fig.update_xaxes(title_text="{}".format(column))

        # Set y-axes titles
fig.update_yaxes(title_text="<b>Count</b>") 

## Plotting with subplots

In [21]:
from plotly.subplots import make_subplots

daily = df.groupby('SOURCE_KEY').resample('D', on='DATE_TIME').agg({'DAILY_YIELD':'sum','IRRADIATION':'sum', 'AC_POWER':'max','DC_POWER':'max', 'MODULE_TEMPERATURE':['mean','max'], 'LOSS':['max','sum']})

inverter = daily.loc[SOURCE_KEY].reset_index()
inverter['SOURCE_KEY']=SOURCE_KEY #TODO avoid recreating column

xvals = inverter['DATE_TIME']
y1 = inverter[('DAILY_YIELD','sum')]
y2 = inverter[('IRRADIATION','sum')]
        
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Bar(x=xvals, y=y1, name="DAILY_YIELD", customdata=inverter[['SOURCE_KEY','DATE_TIME']]),
    secondary_y=False,
    )

fig.add_trace(
    go.Scatter(x=xvals, y=y2, name="IRRADIANCE",mode='lines+markers', customdata=inverter[['SOURCE_KEY','DATE_TIME']]),
    secondary_y=True,
    )

        # Add figure title
fig.update_layout(title=go.layout.Title(text="Daily Yield and Irradiance over Time", font=dict(
    family="Times New Roman",
    size=22,
    color="#030303"
    )))

        # Set x-axis title
fig.update_xaxes(title_text="Date")

        # Set y-axes titles
fig.update_yaxes(title_text="<b>Power (kW)</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Irradiance</b>", secondary_y=True)

fig.update_layout(barmode='group')

## Even More Subplots

Plot a particular day's interval data

In [23]:
df.head()

Unnamed: 0,DATE_TIME,PLANT_ID,SOURCE_KEY,DC_POWER,AC_POWER,DAILY_YIELD,TOTAL_YIELD,AMBIENT_TEMPERATURE,MODULE_TEMPERATURE,IRRADIATION,LOSS
0,2020-05-15,4136001,4UPUqMRk7TRMgml,0.0,0.0,9425.0,2429011.0,27.004764,25.060789,0.0,0.0
1,2020-05-15,4136001,81aHJ1q11NBPMrL,0.0,0.0,0.0,1215279000.0,27.004764,25.060789,0.0,0.0
2,2020-05-15,4136001,9kRcWv60rDACzjR,0.0,0.0,3075.333333,2247720000.0,27.004764,25.060789,0.0,0.0
3,2020-05-15,4136001,Et9kgGMDl729KT4,0.0,0.0,269.933333,1704250.0,27.004764,25.060789,0.0,0.0
4,2020-05-15,4136001,IQ2d7wF4YD8zU1Q,0.0,0.0,3177.0,19941530.0,27.004764,25.060789,0.0,0.0


In [25]:
from datetime import datetime, timedelta
date = '2020-05-19'
start = datetime.strptime(date,'%Y-%m-%d')
end = start + timedelta(days=1)
inverter_raw = df.set_index(['SOURCE_KEY','DATE_TIME']).loc[SOURCE_KEY].loc[start:end]
                            
        
#Create figure and plot DC, AC and Irradiance values.
fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Create figure layout and title
fig.update_layout(title=go.layout.Title(text="Raw Power Output vs Irradiance", font=dict(
                family="Times New Roman",
                size=22,
                color="#030303",                
        )))

# Line chart for DC power        
fig.add_trace(go.Scatter(x=inverter_raw.index, y=inverter_raw['DC_POWER'],
                    mode='lines',
                    name='DC',                                     
                    ),
                    secondary_y=False)

# Area Chart for AC Power                    
fig.add_trace(go.Scatter(x=inverter_raw.index, y=inverter_raw['AC_POWER'],
                    fill='tozeroy',
                    mode='lines',
                    name='AC',                    
                    ),
                    secondary_y=False)

# Another Line Chart for Irradiation
fig.add_trace(go.Scatter(x=inverter_raw.index, y=inverter_raw['IRRADIATION'],
                    mode='lines',
                    name='IRRADIATION',
                    ),
                    secondary_y=True)        