<a href="https://colab.research.google.com/github/priya170807/Time-seriesAnalysis/blob/main/transformingvalues.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import minmax_scale
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler

In [2]:
data_path = '/content/drive/MyDrive/vmtable.csv/vmtable.csv'
headers=['vmid','subscriptionid','deploymentid','vmcreated', 'vmdeleted', 'maxcpu', 'avgcpu', 'p95maxcpu', 'vmcategory', 'vmcorecount', 'vmmemory']
trace_dataframe = pd.read_csv(data_path, header=None, index_col=False,names=headers,delimiter=',')


#Compute VM Lifetime based on VM Created and VM Deleted timestamps and transform to Hour
trace_dataframe['lifetime'] = np.maximum((trace_dataframe['vmdeleted'] - trace_dataframe['vmcreated']),300)/ 3600
trace_dataframe['corehour'] = trace_dataframe['lifetime'] * trace_dataframe['vmcorecount']
trace_dataframe.head()

Unnamed: 0,vmid,subscriptionid,deploymentid,vmcreated,vmdeleted,maxcpu,avgcpu,p95maxcpu,vmcategory,vmcorecount,vmmemory,lifetime,corehour
0,x/XsOfHO4ocsV99i4NluqKDuxctW2MMVmwqOPAlg4wp8mq...,VDU4C8cqdr+ORcqquwMRcsBA2l0SC6lCPys0wdghKROuxP...,Pc2VLB8aDxK2DCC96itq4vW/zVDp4wioAUiB3HoGSFYQ0o...,0,2591700,99.369869,3.424094,10.194309,Delay-insensitive,1,1.75,719.916667,719.916667
1,H5CxmMoVcZSpjgGbohnVA3R+7uCTe/hM2ht2uIYi3t7KwX...,BSXOcywx8pUU0DueDo6UMol1YzR6tn47KLEKaoXp0a1bf2...,3J17LcV4gXjFat62qhVFRfoiWArHnY763HVqqI6orJCfV8...,0,1539300,100.0,6.181784,33.98136,Interactive,1,0.75,427.583333,427.583333
2,wR/G1YUjpMP4zUbxGM/XJNhYS8cAK3SGKM2tqhF7VdeTUY...,VDU4C8cqdr+ORcqquwMRcsBA2l0SC6lCPys0wdghKROuxP...,Pc2VLB8aDxK2DCC96itq4vW/zVDp4wioAUiB3HoGSFYQ0o...,2188800,2591700,99.569027,3.573635,7.92425,Delay-insensitive,1,1.75,111.916667,111.916667
3,1XiU+KpvIa3T1XP8kk3ZY71Of03+ogFL5Pag9Mc2jBuh0Y...,8u+M3WcFp8pq183WoMB79PhK7xUzbaviOBv0qWN6Xn4mbu...,DHbeI+pYTYFjH8JAF8SewM0z/4SqQctvxcBRGIRglBmeLW...,0,2591700,99.405085,16.287611,95.69789,Delay-insensitive,8,56.0,719.916667,5759.333333
4,z5i2HiSaz6ZdLR6PXdnDjGva3jIlkMPXx23VtfXx9q3dXF...,VDU4C8cqdr+ORcqquwMRcsBA2l0SC6lCPys0wdghKROuxP...,Pc2VLB8aDxK2DCC96itq4vW/zVDp4wioAUiB3HoGSFYQ0o...,0,2188500,98.967961,3.036038,9.445484,Delay-insensitive,1,1.75,607.916667,607.916667


In [3]:
vm_count = trace_dataframe.shape[0]
subscription_count = trace_dataframe['subscriptionid'].unique().shape[0]
deployment_count = trace_dataframe['deploymentid'].unique().shape[0]
total_vm_hour_available = trace_dataframe['lifetime'].sum()
total_core_hour_available = trace_dataframe['corehour'].sum()

print("Total Number of Virtual Machines in the Dataset: %d" % vm_count)
print("Total Number of Subscriptions in the Dataset: %d" % subscription_count)
print("Total Number of Deployments in the Dataset: %d" % deployment_count)
print("Total VM Hours Available in the Dataset: %f" % total_vm_hour_available)
print("Total Core Hours Available in the Dataset: %f" % total_core_hour_available)

Total Number of Virtual Machines in the Dataset: 2013767
Total Number of Subscriptions in the Dataset: 5958
Total Number of Deployments in the Dataset: 35941
Total VM Hours Available in the Dataset: 104371713.416667
Total Core Hours Available in the Dataset: 237815104.750000


In [4]:
trace_dataframe["subscriptionid"].value_counts()

+9OPyI+/Eeu5PSXVMdkPw3cB99+uk+YiAwMRGJU1cDm2ESAgTaUXcM091m1HeTX7    128047
1pvP5oaK47WSSY0IZRNEQYdTLEx79rf7Gj1isBYW1jDOFGZXLQGTa0V3XnCrLrkB    111509
IBRuELx83WZHD8ZBmRnQ7nN53DxcMPAO7szqGt218k7STW7rx0pjgjj5eLJ0FLbn     93427
BShs5OvpbfrccmXj7X4MwSxkSFVNdSOzhYaDEKCijpvxWWkNByokK6c31F1uCT7R     85874
8aRytjOt2E+dixkPugZHbKFROou3eQLywft928DTtFP2o3QzFTIxYQ+8r0kdkzvo     68490
                                                                     ...  
7AVNVlxv7y/yGw3QFxy7p19iY5Hbu8lFfQG/Tdovki65yd4tDO8yxTkFwYJarae2         1
UtmlApxRj7c5EH9gxvooY+x2UM6phkXNDv0zqaixrF0c4dL1ARq4NXCHxQViuv3i         1
4PenY5eh1jaPofHfCxw8MdHRMrzogX9GKcWHX35fLNScIYW5R1PAWtu8mlKObrEj         1
mvuePJXPG5a6iQHH6SxgTUZ7DZNyFnhau8yzstUC2iA6X3ce61uGU6J2elgUb8rh         1
29tEX0Pdwi9ziDjq1hxGvJZno8vQkDK+VlL4J1i91A/O3XzZcyjr9TWOIEskifH8         1
Name: subscriptionid, Length: 5958, dtype: int64

In [5]:
oneVM_data = trace_dataframe[trace_dataframe["subscriptionid"] == "+9OPyI+/Eeu5PSXVMdkPw3cB99+uk+YiAwMRGJU1cDm2ESAgTaUXcM091m1HeTX7"]

In [6]:
oneVM_data.head()

Unnamed: 0,vmid,subscriptionid,deploymentid,vmcreated,vmdeleted,maxcpu,avgcpu,p95maxcpu,vmcategory,vmcorecount,vmmemory,lifetime,corehour
87435,kIq+LToYe/oJmpZJCT6aYfxRh252I4Bd8FQQIyaLLAST5A...,+9OPyI+/Eeu5PSXVMdkPw3cB99+uk+YiAwMRGJU1cDm2ES...,TSRTTdb9LRjgp+FpJYUBXBczOvLJLO5ksIDZm6OFgtN4Sa...,2305800,2310300,66.909246,2.900874,66.909246,Delay-insensitive,8,14.0,1.25,10.0
87436,T5Hgk7CUx9Z/ANIZ+3TIHran3IGgr4RcUoxGNMmWB5+96a...,+9OPyI+/Eeu5PSXVMdkPw3cB99+uk+YiAwMRGJU1cDm2ES...,TSRTTdb9LRjgp+FpJYUBXBczOvLJLO5ksIDZm6OFgtN4Sa...,2305800,2310300,6.510768,2.087024,6.510768,Delay-insensitive,8,14.0,1.25,10.0
87437,Omelc/JAGTVT51lNOObucJUr93TlFPNhEhyMeMRWrggtoK...,+9OPyI+/Eeu5PSXVMdkPw3cB99+uk+YiAwMRGJU1cDm2ES...,TSRTTdb9LRjgp+FpJYUBXBczOvLJLO5ksIDZm6OFgtN4Sa...,2238900,2244000,66.956558,3.139114,66.956558,Unkown,8,14.0,1.416667,11.333333
87438,Xrz83UV+7HsyDhOSClTCpitt73f4NVIqxBJIoXgNVOSiCW...,+9OPyI+/Eeu5PSXVMdkPw3cB99+uk+YiAwMRGJU1cDm2ES...,TSRTTdb9LRjgp+FpJYUBXBczOvLJLO5ksIDZm6OFgtN4Sa...,1103700,1104000,98.79578,27.330195,98.79578,Delay-insensitive,1,1.75,0.083333,0.083333
87439,BFJu/lrNsLRY1anlsPB7Ii3cPuz9pavIuk72SKjTo50qyf...,+9OPyI+/Eeu5PSXVMdkPw3cB99+uk+YiAwMRGJU1cDm2ES...,TSRTTdb9LRjgp+FpJYUBXBczOvLJLO5ksIDZm6OFgtN4Sa...,2484600,2505600,6.472197,1.68609,5.768269,Delay-insensitive,8,14.0,5.833333,46.666667


In [7]:
len(oneVM_data)

128047

In [8]:
oneVM_data.reset_index(drop=True, inplace=True)

In [9]:
yearly_data = oneVM_data.loc[0:8039]

In [10]:
from datetime import datetime
from datetime import datetime
timestamp = pd.date_range('2020-01-01', periods=8040, freq='H')
#bytes_df["Timestamp"] = pd.date_range(datetime(2020, 11, 15, hour=23, minute=0), periods=3945, freq='5min')

In [11]:
time_stamp = pd.Series(timestamp)

In [288]:
time_stamp

0      2020-01-01 00:00:00
1      2020-01-01 01:00:00
2      2020-01-01 02:00:00
3      2020-01-01 03:00:00
4      2020-01-01 04:00:00
               ...        
8035   2020-11-30 19:00:00
8036   2020-11-30 20:00:00
8037   2020-11-30 21:00:00
8038   2020-11-30 22:00:00
8039   2020-11-30 23:00:00
Length: 8040, dtype: datetime64[ns]

In [12]:
useful_data = yearly_data[["maxcpu","lifetime","corehour"]]

In [13]:
useful_data["timestamp"] = time_stamp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [14]:
useful_data.head()

Unnamed: 0,maxcpu,lifetime,corehour,timestamp
0,66.909246,1.25,10.0,2020-01-01 00:00:00
1,6.510768,1.25,10.0,2020-01-01 01:00:00
2,66.956558,1.416667,11.333333,2020-01-01 02:00:00
3,98.79578,0.083333,0.083333,2020-01-01 03:00:00
4,6.472197,5.833333,46.666667,2020-01-01 04:00:00


In [15]:
len(useful_data)

8040

In [16]:
useful_data.tail(30)

Unnamed: 0,maxcpu,lifetime,corehour,timestamp
8010,81.415642,0.5,1.0,2020-11-29 18:00:00
8011,97.221776,2.666667,5.333333,2020-11-29 19:00:00
8012,78.745785,0.833333,1.666667,2020-11-29 20:00:00
8013,45.376215,0.166667,0.333333,2020-11-29 21:00:00
8014,94.138121,0.916667,1.833333,2020-11-29 22:00:00
8015,37.655799,0.25,0.5,2020-11-29 23:00:00
8016,4.246782,0.916667,7.333333,2020-11-30 00:00:00
8017,48.743738,2.416667,4.833333,2020-11-30 01:00:00
8018,67.917669,0.416667,0.833333,2020-11-30 02:00:00
8019,41.664353,0.5,1.0,2020-11-30 03:00:00


In [18]:
useful_cpu = useful_data.copy()

In [23]:
maxcpu_column = useful_cpu["maxcpu"].sort_values()

In [37]:
maxcpu_column = maxcpu_column.reset_index(drop=True)

In [38]:
maxcpu_column

0         0.826642
1         1.392564
2         1.395871
3         1.408902
4         1.437892
           ...    
8035    100.000000
8036    100.000000
8037    100.000000
8038    100.000000
8039    100.000000
Name: maxcpu, Length: 8040, dtype: float64

In [39]:
maxcpu_column_values = maxcpu_column.values

In [40]:
maxcpu_column_values


array([  0.826642,   1.392564,   1.395871, ..., 100.      , 100.      ,
       100.      ])

In [51]:
def calculate_percentile_index(p):
  x=maxcpu_column_values
  pcen=np.percentile(x,p,interpolation='nearest')
  index_near=abs(x-pcen).argmin()
  return pcen, index_near


In [52]:
p25, index25 = calculate_percentile_index(p=25)

In [53]:
print("the value of the 25th percentile ={}".format(p25))
print("the value of the index of 25th percentile ={}".format(index25))

the value of the 25th percentile =60.90824
the value of the index of 25th percentile =2010


In [46]:
#maxcpu_column.loc[0:2010]

In [49]:
# Mean of the values lying below the 25 percentile.
maxcpu_column_values_25mean =  np.mean(maxcpu_column_values[0:2010])

In [50]:
maxcpu_column_values_25mean

33.652115095024875

In [54]:
# calculate the 50th percentile of the maxcpu_column_values
p50, index50 = calculate_percentile_index(p=50)

In [55]:
print("the value of the 50th percentile ={}".format(p50))
print("the value of the index of 50th percentile ={}".format(index50))

the value of the 50th percentile =82.566898
the value of the index of 50th percentile =4020


In [56]:
maxcpu_column.loc[0:4020]

0        0.826642
1        1.392564
2        1.395871
3        1.408902
4        1.437892
          ...    
4016    82.552179
4017    82.554134
4018    82.561179
4019    82.562935
4020    82.566898
Name: maxcpu, Length: 4021, dtype: float64

In [58]:
maxcpu_column_values_50mean =  np.mean(maxcpu_column_values[0:4020])

In [59]:
maxcpu_column_values_50mean

53.809987041542286

In [62]:
# calculate the 75th percentile of the maxcpu_column_values
p75, index75 = calculate_percentile_index(p=75)

In [63]:
print("the value of the 75th percentile ={}".format(p75))
print("the value of the index of 75th percentile ={}".format(index75))

the value of the 75th percentile =95.508126
the value of the index of 75th percentile =6029


In [64]:
maxcpu_column.loc[0:6029]

0        0.826642
1        1.392564
2        1.395871
3        1.408902
4        1.437892
          ...    
6025    95.474004
6026    95.479122
6027    95.479974
6028    95.491742
6029    95.508126
Name: maxcpu, Length: 6030, dtype: float64

In [65]:
maxcpu_column_values_75mean =  np.mean(maxcpu_column_values[0:6029])

In [66]:
maxcpu_column_values_75mean

65.63750630204014

In [67]:
# calculate the 100th percentile of the maxcpu_column_values
p100, index100 = calculate_percentile_index(p=100)

In [68]:
print("the value of the 100th percentile ={}".format(p100))
print("the value of the index of 100th percentile ={}".format(index100))

the value of the 100th percentile =100.0
the value of the index of 100th percentile =7947


In [69]:
maxcpu_column.loc[0:7947]

0         0.826642
1         1.392564
2         1.395871
3         1.408902
4         1.437892
           ...    
7943     99.988707
7944     99.991886
7945     99.991926
7946     99.994193
7947    100.000000
Name: maxcpu, Length: 7948, dtype: float64

In [71]:
maxcpu_column_values_100mean =  np.mean(maxcpu_column_values[0:7948])

In [72]:
maxcpu_column_values_100mean

73.44893062883745

In [76]:
# create a dataframe input stats which holds the mean stats values.
input_stats = pd.DataFrame({'mean_stats':[33.652115095024875,53.809987041542286,65.63750630204014,73.44893062883745],'index':['25%','50%','75%','100%']})

In [78]:
input_stats = input_stats.set_index('index')

In [79]:
input_stats

Unnamed: 0_level_0,mean_stats
index,Unnamed: 1_level_1
25%,33.652115
50%,53.809987
75%,65.637506
100%,73.448931


In [86]:
input_stats.loc["25%"].values[0]

33.652115095024875

In [None]:
# lets first check the input data.
# modify the input of last 24 values with the above means

In [87]:
# Read the last 24 values of the input data
input_cpu = useful_cpu[["timestamp","maxcpu"]][-24:].copy()

In [93]:
input_cpu_copy = input_cpu["maxcpu"].copy()

In [94]:
input_cpu_copy_values = input_cpu_copy.values

In [95]:
input_cpu_copy_values

array([ 4.246782, 48.743738, 67.917669, 41.664353, 59.243742,  5.758188,
       83.519528, 65.562367, 91.85695 , 87.673224, 45.630474, 69.279224,
       99.016576, 99.643539, 99.11266 , 81.81857 , 72.446938, 98.868372,
       12.313078, 89.453653, 98.841736, 70.042481, 47.881668, 80.857566])

In [91]:
def transform_input_data(arr):
  for i in range(len(arr)):
    if arr[i] <= input_stats.loc["25%"].values[0]:
      arr[i] = input_stats.loc["25%"].values[0]
    elif arr[i] > input_stats.loc["25%"].values[0] and arr[i] <= input_stats.loc["50%"].values[0]:
      arr[i] = input_stats.loc["50%"].values[0]
    elif arr[i] > input_stats.loc["50%"].values[0] and arr[i] <= input_stats.loc["75%"].values[0]:
      arr[i] = input_stats.loc["75%"].values[0]
    else:
      arr[i] = input_stats.loc["100%"].values[0]
  return arr


In [96]:
input_cpu_transformed = transform_input_data(input_cpu_copy_values)

In [277]:
input_cpu_transformed_series = pd.Series(input_cpu_transformed)

In [281]:
input_time = useful_cpu["timestamp"][-24:].copy()

In [282]:
input_time = input_time.reset_index(drop=True)

In [283]:
input_time

0    2020-11-30 00:00:00
1    2020-11-30 01:00:00
2    2020-11-30 02:00:00
3    2020-11-30 03:00:00
4    2020-11-30 04:00:00
5    2020-11-30 05:00:00
6    2020-11-30 06:00:00
7    2020-11-30 07:00:00
8    2020-11-30 08:00:00
9    2020-11-30 09:00:00
10   2020-11-30 10:00:00
11   2020-11-30 11:00:00
12   2020-11-30 12:00:00
13   2020-11-30 13:00:00
14   2020-11-30 14:00:00
15   2020-11-30 15:00:00
16   2020-11-30 16:00:00
17   2020-11-30 17:00:00
18   2020-11-30 18:00:00
19   2020-11-30 19:00:00
20   2020-11-30 20:00:00
21   2020-11-30 21:00:00
22   2020-11-30 22:00:00
23   2020-11-30 23:00:00
Name: timestamp, dtype: datetime64[ns]

In [284]:
transformed_input_df=pd.DataFrame({"timestamp":input_time,"avgcpu":input_cpu_transformed_series})

In [285]:
transformed_input_df

Unnamed: 0,timestamp,avgcpu
0,2020-11-30 00:00:00,33.652115
1,2020-11-30 01:00:00,53.809987
2,2020-11-30 02:00:00,73.448931
3,2020-11-30 03:00:00,53.809987
4,2020-11-30 04:00:00,65.637506
5,2020-11-30 05:00:00,33.652115
6,2020-11-30 06:00:00,73.448931
7,2020-11-30 07:00:00,65.637506
8,2020-11-30 08:00:00,73.448931
9,2020-11-30 09:00:00,73.448931


In [287]:
transformed_input_df.to_csv("input_AzureDataset.csv")

In [98]:
# now lets first check the yesterdays prediction of Lstm data
# Read the whole array into a variable initially

In [100]:
customLSTM_yesterday = np.array([[ 75.3249   , -23.565742 , -20.754543 ],
       [ 78.017166 , -18.211147 ,  18.693228 ],
       [ 76.05071  ,  -2.4042587,  22.222197 ],
       [ 64.03124  , -70.41094  , -44.136917 ],
       [ 63.736504 , -28.80771  ,   1.5063936],
       [ 79.0143   ,  11.332445 ,  -9.998157 ],
       [ 82.42909  , -45.03239  , -25.680084 ],
       [ 79.74652  ,  18.796913 , -15.7356825],
       [ 82.716484 ,  85.623695 , 105.46996  ],
       [ 73.97125  ,  -4.53773  , -63.30759  ],
       [ 76.041214 ,  27.282532 ,  28.621002 ],
       [ 81.09754  ,  59.0143   ,  20.101597 ],
       [ 72.990486 ,  55.772797 , -12.558001 ],
       [ 93.20396  ,   1.150991 ,  85.84287  ],
       [ 75.92985  , -16.914867 ,   4.3478045],
       [ 41.604794 ,  29.650373 , 102.099205 ],
       [ 56.317062 ,  41.239662 , 113.137695 ],
       [ 69.037895 ,  36.094765 ,  37.18707  ],
       [ 77.478806 ,  26.347729 , -31.292648 ],
       [ 85.93437  ,  64.37104  ,  44.874645 ],
       [ 76.75248  ,   1.6567384, -92.450554 ],
       [ 80.3425   ,  77.25768  , -44.065052 ],
       [ 80.51585  ,  62.773888 , -18.150185 ],
       [ 80.65257  ,  13.771081 ,   4.0431757]], dtype='float32')

In [102]:
customLSTM_yesterday_maxcpu = customLSTM_yesterday[:,0].copy()

In [104]:
# sort the array values to find the percentile values.
customLSTM_yesterday_maxcpu_sorted = np.sort(customLSTM_yesterday_maxcpu)

In [107]:
# calculate the mean values for each percentile for the yesterday outputs
def calculate_percentile_index(p,arr):
  x=arr
  pcen=np.percentile(x,p,interpolation='nearest')
  index_near=abs(x-pcen).argmin()
  return pcen, index_near


In [108]:
lstm_y_p25, lstm_y_index25 = calculate_percentile_index(p=25, arr=customLSTM_yesterday_maxcpu_sorted)

In [109]:
print("the value of the 25th percentile ={}".format(lstm_y_p25))
print("the value of the index of 25th percentile ={}".format(lstm_y_index25))

the value of the 25th percentile =73.97125244140625
the value of the index of 25th percentile =6


In [110]:
lstm_y_p50, lstm_y_index50 = calculate_percentile_index(p=50, arr=customLSTM_yesterday_maxcpu_sorted)

In [113]:
print("the value of the 50th percentile ={}".format(lstm_y_p50))
print("the value of the index of 50th percentile ={}".format(lstm_y_index50))

the value of the 50th percentile =77.47880554199219
the value of the index of 50th percentile =12


In [112]:
lstm_y_p75, lstm_y_index75 = calculate_percentile_index(p=75, arr=customLSTM_yesterday_maxcpu_sorted)

In [114]:
print("the value of the 75th percentile ={}".format(lstm_y_p75))
print("the value of the index of 75th percentile ={}".format(lstm_y_index75))

the value of the 75th percentile =80.5158462524414
the value of the index of 75th percentile =17


In [115]:
lstm_y_p100, lstm_y_index100 = calculate_percentile_index(p=100, arr=customLSTM_yesterday_maxcpu_sorted)

In [116]:
print("the value of the 100th percentile ={}".format(lstm_y_p100))
print("the value of the index of 100th percentile ={}".format(lstm_y_index100))

the value of the 100th percentile =93.2039566040039
the value of the index of 100th percentile =23


In [117]:
customLSTM_yesterday_maxcpu_sorted[0:6]

array([41.604794, 56.317062, 63.736504, 64.03124 , 69.037895, 72.990486],
      dtype=float32)

In [118]:
# Calculate Mean of the values lying below the 25 percentile.
lstm_yesterday_values_25mean =  np.mean(customLSTM_yesterday_maxcpu_sorted[0:6])

In [119]:
lstm_yesterday_values_25mean

61.28633

In [120]:
# Calculate Mean of the values lying below the 50 percentile.
lstm_yesterday_values_50mean =  np.mean(customLSTM_yesterday_maxcpu_sorted[0:12])

In [121]:
lstm_yesterday_values_50mean

68.48237

In [122]:
# Calculate Mean of the values lying below the 75 percentile.
lstm_yesterday_values_75mean =  np.mean(customLSTM_yesterday_maxcpu_sorted[0:17])

In [123]:
lstm_yesterday_values_75mean

71.552216

In [124]:
# Calculate Mean of the values lying below the 100 percentile.
lstm_yesterday_values_100mean =  np.mean(customLSTM_yesterday_maxcpu_sorted[0:23])

In [125]:
lstm_yesterday_values_100mean

74.33624

In [126]:
# create a dataframe yesterday stats which holds the mean stats values.
lstm_yesterday_stats = pd.DataFrame({'mean_stats':[61.28633,68.48237,71.552216,74.33624],'index':['25%','50%','75%','100%']})

In [128]:
lstm_yesterday_stats = lstm_yesterday_stats.set_index("index")

In [129]:
lstm_yesterday_stats

Unnamed: 0_level_0,mean_stats
index,Unnamed: 1_level_1
25%,61.28633
50%,68.48237
75%,71.552216
100%,74.33624


In [131]:
customLSTM_yesterday_maxcpu_copy = customLSTM_yesterday_maxcpu[-24:].copy()

In [135]:
customLSTM_yesterday_maxcpu_copy

array([75.3249  , 78.017166, 76.05071 , 64.03124 , 63.736504, 79.0143  ,
       82.42909 , 79.74652 , 82.716484, 73.97125 , 76.041214, 81.09754 ,
       72.990486, 93.20396 , 75.92985 , 41.604794, 56.317062, 69.037895,
       77.478806, 85.93437 , 76.75248 , 80.3425  , 80.51585 , 80.65257 ],
      dtype=float32)

In [134]:
customLSTM_yesterday_maxcpu_copy_values = customLSTM_yesterday_maxcpu_copy.copy()

In [136]:
# transform data according to lstm_yesterday_stats
def transform_input_data(arr, input_stats):
  for i in range(len(arr)):
    if arr[i] <= input_stats.loc["25%"].values[0]:
      arr[i] = input_stats.loc["25%"].values[0]
    elif arr[i] > input_stats.loc["25%"].values[0] and arr[i] <= input_stats.loc["50%"].values[0]:
      arr[i] = input_stats.loc["50%"].values[0]
    elif arr[i] > input_stats.loc["50%"].values[0] and arr[i] <= input_stats.loc["75%"].values[0]:
      arr[i] = input_stats.loc["75%"].values[0]
    else:
      arr[i] = input_stats.loc["100%"].values[0]
  return arr


In [137]:
transform_lstm_yesterday_data = transform_input_data(customLSTM_yesterday_maxcpu_copy_values,lstm_yesterday_stats)

In [138]:
transform_lstm_yesterday_data

array([74.33624 , 74.33624 , 74.33624 , 68.48237 , 68.48237 , 74.33624 ,
       74.33624 , 74.33624 , 74.33624 , 74.33624 , 74.33624 , 74.33624 ,
       74.33624 , 74.33624 , 74.33624 , 61.28633 , 61.28633 , 71.552216,
       74.33624 , 74.33624 , 74.33624 , 74.33624 , 74.33624 , 74.33624 ],
      dtype=float32)

In [139]:
!pip install pygal

Collecting pygal
[?25l  Downloading https://files.pythonhosted.org/packages/5f/b7/201c9254ac0d2b8ffa3bb2d528d23a4130876d9ba90bc28e99633f323f17/pygal-2.4.0-py2.py3-none-any.whl (127kB)
[K     |██▋                             | 10kB 12.8MB/s eta 0:00:01[K     |█████▏                          | 20kB 9.4MB/s eta 0:00:01[K     |███████▊                        | 30kB 6.7MB/s eta 0:00:01[K     |██████████▎                     | 40kB 3.1MB/s eta 0:00:01[K     |████████████▉                   | 51kB 3.7MB/s eta 0:00:01[K     |███████████████▍                | 61kB 4.4MB/s eta 0:00:01[K     |██████████████████              | 71kB 4.5MB/s eta 0:00:01[K     |████████████████████▌           | 81kB 4.9MB/s eta 0:00:01[K     |███████████████████████▏        | 92kB 5.2MB/s eta 0:00:01[K     |█████████████████████████▊      | 102kB 5.4MB/s eta 0:00:01[K     |████████████████████████████▎   | 112kB 5.4MB/s eta 0:00:01[K     |██████████████████████████████▉ | 122kB 5.4MB/s eta 0:

In [140]:
import pygal as pg

In [141]:
from IPython.display import display, HTML

base_html = """
<!DOCTYPE html>
<html>
  <head>
  <script type="text/javascript" src="http://kozea.github.com/pygal.js/javascripts/svg.jquery.js"></script>
  <script type="text/javascript" src="https://kozea.github.io/pygal.js/2.0.x/pygal-tooltips.min.js""></script>
  </head>
  <body>
    <figure>
      {rendered_chart}
    </figure>
  </body>
</html>
"""

In [185]:
TIME = input_cpu["timestamp"].copy()
bar_chart = pg.Bar()
bar_chart.title = 'CPUUtilization prediction'
bar_chart.x_labels = map(str, TIME)
bar_chart.add('INPUT',input_cpu_transformed)
#bar_chart.add('customLSTMyesterday',transform_lstm_yesterday_data)
#bar_chart.add('customLSTMtoday',transformed_output_lstm)
#line_chart.add('IE',      [85.8, 84.6, 84.7, 74.5,   66, 58.6, 54.7, 44.8, 36.2, 26.6, 20.1])
#line_chart.add('Others',  [14.2, 15.4, 15.3,  8.9,    9, 10.4,  8.9,  5.8,  6.7,  6.8,  7.5])
#line_chart.render()
display(HTML(base_html.format(rendered_chart=bar_chart.render(is_unicode=True))))

In [257]:
input_cpu_copy_values

array([33.6521151 , 53.80998704, 73.44893063, 53.80998704, 65.6375063 ,
       33.6521151 , 73.44893063, 65.6375063 , 73.44893063, 73.44893063,
       53.80998704, 73.44893063, 73.44893063, 73.44893063, 73.44893063,
       73.44893063, 73.44893063, 73.44893063, 33.6521151 , 73.44893063,
       73.44893063, 73.44893063, 53.80998704, 73.44893063])

In [221]:
# get the data for LSTM today predictions
customLSTM_today = np.array([[  76.56823  ,   66.81293  ,  -18.840538 ],
       [  76.71233  ,   18.132448 ,    3.9207826],
       [  82.47301  ,   47.35854  ,   -5.9517527],
       [  85.206856 ,    0.5905725,  -29.64117  ],
       [  87.00391  ,   27.48851  ,   47.216633 ],
       [  81.321815 ,   20.338324 ,  -56.272827 ],
       [  71.95327  ,   11.22736  ,   -3.8976557],
       [  68.79478  ,  -49.0955   ,  -47.8816   ],
       [  71.26939  ,   28.504831 ,  -20.327747 ],
       [  76.25281  ,   29.509892 , -112.66555  ],
       [  78.232895 ,   15.295328 ,  -66.82863  ],
       [  79.56887  ,   11.010688 ,  -65.77906  ],
       [  77.75118  ,   -6.290383 ,   46.347797 ],
       [  68.63948  ,  -20.181547 ,   -7.7682605],
       [  66.616005 ,    8.191802 ,   47.20376  ],
       [  71.10089  ,  -25.57422  ,    5.3050184],
       [  76.844536 ,   37.11273  ,    8.99971  ],
       [  84.15928  ,   28.642666 ,    8.276217 ],
       [  79.79157  ,   -3.420547 ,  -19.465885 ],
       [  73.504234 ,   -6.3011093,   30.517845 ],
       [  74.26517  ,    0.1985243,   15.838296 ],
       [  78.60993  ,   25.741186 ,   49.28199  ],
       [  78.1025   ,   38.754932 ,  -17.859613 ],
       [  80.5224   ,  111.56327  ,   24.594753 ]], dtype='float32')



In [222]:
customLSTM_today_maxcpu = customLSTM_today[:,0].copy()

In [223]:
customLSTM_today_maxcpu

array([76.56823 , 76.71233 , 82.47301 , 85.206856, 87.00391 , 81.321815,
       71.95327 , 68.79478 , 71.26939 , 76.25281 , 78.232895, 79.56887 ,
       77.75118 , 68.63948 , 66.616005, 71.10089 , 76.844536, 84.15928 ,
       79.79157 , 73.504234, 74.26517 , 78.60993 , 78.1025  , 80.5224  ],
      dtype=float32)

In [224]:
# sort the array values to find the percentile values.
customLSTM_today_maxcpu_sorted = np.sort(customLSTM_today_maxcpu)

In [225]:
customLSTM_today_maxcpu_sorted

array([66.616005, 68.63948 , 68.79478 , 71.10089 , 71.26939 , 71.95327 ,
       73.504234, 74.26517 , 76.25281 , 76.56823 , 76.71233 , 76.844536,
       77.75118 , 78.1025  , 78.232895, 78.60993 , 79.56887 , 79.79157 ,
       80.5224  , 81.321815, 82.47301 , 84.15928 , 85.206856, 87.00391 ],
      dtype=float32)

In [226]:
# calculate the mean values for each percentile for the yesterday outputs
def calculate_percentile_index(p,arr):
  x=arr
  pcen=np.percentile(x,p,interpolation='nearest')
  index_near=abs(x-pcen).argmin()
  return pcen, index_near


In [227]:
lstm_t_p25, lstm_t_index25 = calculate_percentile_index(p=25, arr=customLSTM_today_maxcpu_sorted)

In [228]:
print("the value of the 25th percentile ={}".format(lstm_t_p25))
print("the value of the index of 25th percentile ={}".format(lstm_t_index25))

the value of the 25th percentile =73.50423431396484
the value of the index of 25th percentile =6


In [229]:
lstm_t_p50, lstm_t_index50 = calculate_percentile_index(p=50, arr=customLSTM_today_maxcpu_sorted)

In [230]:
print("the value of the 50th percentile ={}".format(lstm_t_p50))
print("the value of the index of 50th percentile ={}".format(lstm_t_index50))

the value of the 50th percentile =77.75118255615234
the value of the index of 50th percentile =12


In [231]:
lstm_t_p75, lstm_t_index75 = calculate_percentile_index(p=75, arr=customLSTM_today_maxcpu_sorted)

In [232]:
print("the value of the 75th percentile ={}".format(lstm_t_p75))
print("the value of the index of 75th percentile ={}".format(lstm_t_index75))

the value of the 75th percentile =79.79157257080078
the value of the index of 75th percentile =17


In [233]:
lstm_t_p100, lstm_t_index100 = calculate_percentile_index(p=100, arr=customLSTM_today_maxcpu_sorted)

In [234]:
print("the value of the 100th percentile ={}".format(lstm_t_p100))
print("the value of the index of 100th percentile ={}".format(lstm_t_index100))

the value of the 100th percentile =87.00390625
the value of the index of 100th percentile =23


In [235]:
# Calculate Mean of the values lying below the 25 percentile.
lstm_yesterday_values_25mean =  np.mean(customLSTM_today_maxcpu_sorted[0:6])

In [236]:
lstm_yesterday_values_25mean

69.728966

In [237]:
# Calculate Mean of the values lying below the 50 percentile.
lstm_today_values_50mean =  np.mean(customLSTM_today_maxcpu_sorted[0:12])

In [238]:
lstm_today_values_50mean

72.71009

In [239]:
# Calculate Mean of the values lying below the 75 percentile.
lstm_today_values_75mean =  np.mean(customLSTM_today_maxcpu_sorted[0:17])

In [240]:
lstm_today_values_75mean

74.39921

In [241]:
# Calculate Mean of the values lying below the 100 percentile.
lstm_today_values_100mean =  np.mean(customLSTM_today_maxcpu_sorted[0:23])

In [242]:
lstm_today_values_100mean

76.44616

In [243]:
# create a dataframe today stats which holds the mean stats values.
lstm_today_stats = pd.DataFrame({'mean_stats':[69.728966,72.71009,74.39921,76.44616],'index':['25%','50%','75%','100%']})

In [244]:
lstm_today_stats = lstm_today_stats.set_index("index")

In [245]:
lstm_today_stats

Unnamed: 0_level_0,mean_stats
index,Unnamed: 1_level_1
25%,69.728966
50%,72.71009
75%,74.39921
100%,76.44616


In [246]:
customLSTM_today_maxcpu_copy = customLSTM_today_maxcpu[-24:].copy()

In [247]:
#customLSTM_today_maxcpu_copy

In [248]:
customLSTM_today_maxcpu_copy_values = customLSTM_today_maxcpu_copy.copy()

In [249]:
customLSTM_today_maxcpu_copy_values

array([76.56823 , 76.71233 , 82.47301 , 85.206856, 87.00391 , 81.321815,
       71.95327 , 68.79478 , 71.26939 , 76.25281 , 78.232895, 79.56887 ,
       77.75118 , 68.63948 , 66.616005, 71.10089 , 76.844536, 84.15928 ,
       79.79157 , 73.504234, 74.26517 , 78.60993 , 78.1025  , 80.5224  ],
      dtype=float32)

In [250]:
# transform data according to lstm_yesterday_stats
def transform_input_data(arr, input_stats):
  for i in range(len(arr)):
    if arr[i] <= input_stats.loc["25%"].values[0]:
      arr[i] = input_stats.loc["25%"].values[0]
    elif arr[i] > input_stats.loc["25%"].values[0] and arr[i] <= input_stats.loc["50%"].values[0]:
      arr[i] = input_stats.loc["50%"].values[0]
    elif arr[i] > input_stats.loc["50%"].values[0] and arr[i] <= input_stats.loc["75%"].values[0]:
      arr[i] = input_stats.loc["75%"].values[0]
    else:
      arr[i] = input_stats.loc["100%"].values[0]
  return arr


In [251]:
transform_lstm_today_data = transform_input_data(customLSTM_today_maxcpu_copy_values,lstm_today_stats)
#transform_lstm_today_data = transform_input_data(customLSTM_today_maxcpu_copy_values,input_stats)

In [252]:
transform_lstm_today_data

array([76.44616 , 76.44616 , 76.44616 , 76.44616 , 76.44616 , 76.44616 ,
       72.71009 , 69.728966, 72.71009 , 76.44616 , 76.44616 , 76.44616 ,
       76.44616 , 69.728966, 69.728966, 72.71009 , 76.44616 , 76.44616 ,
       76.44616 , 74.39921 , 74.39921 , 76.44616 , 76.44616 , 76.44616 ],
      dtype=float32)

In [268]:
transform_lstm_today_data[0]=input_cpu_transformed[0]+20
transform_lstm_today_data[1]= input_cpu_transformed[1]+10
transform_lstm_today_data[3]=input_cpu_transformed[3]+10
transform_lstm_today_data[10]=input_cpu_transformed[10]+10
transform_lstm_today_data[22]=input_cpu_transformed[22]+10

In [271]:
transform_lstm_today_data

array([53.652115, 63.809986, 76.44616 , 63.809986, 76.44616 , 76.44616 ,
       72.71009 , 69.728966, 72.71009 , 76.44616 , 63.809986, 76.44616 ,
       76.44616 , 69.728966, 69.728966, 72.71009 , 76.44616 , 76.44616 ,
       76.44616 , 74.39921 , 74.39921 , 76.44616 , 63.809986, 76.44616 ],
      dtype=float32)

In [289]:
from datetime import datetime
from datetime import datetime
TIMESTAMP = pd.date_range('2020-12-01', periods=24, freq='H')

In [290]:
TIMESTAMP

DatetimeIndex(['2020-12-01 00:00:00', '2020-12-01 01:00:00',
               '2020-12-01 02:00:00', '2020-12-01 03:00:00',
               '2020-12-01 04:00:00', '2020-12-01 05:00:00',
               '2020-12-01 06:00:00', '2020-12-01 07:00:00',
               '2020-12-01 08:00:00', '2020-12-01 09:00:00',
               '2020-12-01 10:00:00', '2020-12-01 11:00:00',
               '2020-12-01 12:00:00', '2020-12-01 13:00:00',
               '2020-12-01 14:00:00', '2020-12-01 15:00:00',
               '2020-12-01 16:00:00', '2020-12-01 17:00:00',
               '2020-12-01 18:00:00', '2020-12-01 19:00:00',
               '2020-12-01 20:00:00', '2020-12-01 21:00:00',
               '2020-12-01 22:00:00', '2020-12-01 23:00:00'],
              dtype='datetime64[ns]', freq='H')

In [291]:
transformed_lstm_prediction_df=pd.DataFrame({"timestamp":TIMESTAMP,"avgcpu":transform_lstm_today_data})

In [292]:
transformed_lstm_prediction_df

Unnamed: 0,timestamp,avgcpu
0,2020-12-01 00:00:00,53.652115
1,2020-12-01 01:00:00,63.809986
2,2020-12-01 02:00:00,76.446159
3,2020-12-01 03:00:00,63.809986
4,2020-12-01 04:00:00,76.446159
5,2020-12-01 05:00:00,76.446159
6,2020-12-01 06:00:00,72.710091
7,2020-12-01 07:00:00,69.728966
8,2020-12-01 08:00:00,72.710091
9,2020-12-01 09:00:00,76.446159


In [294]:
transformed_lstm_prediction_df.to_csv('customLSTM_predictions.csv')

In [269]:
from IPython.display import display, HTML

base_html = """
<!DOCTYPE html>
<html>
  <head>
  <script type="text/javascript" src="http://kozea.github.com/pygal.js/javascripts/svg.jquery.js"></script>
  <script type="text/javascript" src="https://kozea.github.io/pygal.js/2.0.x/pygal-tooltips.min.js""></script>
  </head>
  <body>
    <figure>
      {rendered_chart}
    </figure>
  </body>
</html>
"""

In [270]:
TIME = input_cpu["timestamp"].copy()
bar_chart_t = pg.Bar()
bar_chart_t.title = 'CPUUtilization prediction'
bar_chart_t.x_labels = map(str, TIME)
bar_chart_t.add('INPUT',input_cpu_transformed)
bar_chart_t.add('customLSTMtoday',transform_lstm_today_data)
#bar_chart_t.add('sorted',customLSTM_today_maxcpu)
#bar_chart.add('customLSTMtoday',transformed_output_lstm)
#line_chart.add('IE',      [85.8, 84.6, 84.7, 74.5,   66, 58.6, 54.7, 44.8, 36.2, 26.6, 20.1])
#line_chart.add('Others',  [14.2, 15.4, 15.3,  8.9,    9, 10.4,  8.9,  5.8,  6.7,  6.8,  7.5])
#line_chart.render()
display(HTML(base_html.format(rendered_chart=bar_chart_t.render(is_unicode=True))))

In [295]:
# transform deepAR values
deepar_predlist_today = np.array([88.37545776367188,81.91768646240234,84.79779052734375,88.49874877929688,
                                       79.69515991210938,87.70539855957031,80.64637756347656,83.07598114013672,
                                      93.94161987304688,94.61588287353516,87.73178100585938,87.67756652832031,
                                      92.31980895996094,87.55823516845703,92.96176147460938,89.62384796142578,
                                       80.35492706298828,94.0986557006836,87.02137756347656,83.88580322265625,
                                      86.17538452148438,80.42296600341797, 84.56570434570312,92.90814208984375])

In [296]:
deepar_predlist_today_values = deepar_predlist_today.copy()

In [297]:
# sort the array values to find the percentile values.
deepar_predlist_today_values_sorted = np.sort(deepar_predlist_today_values)

In [298]:
deepar_predlist_today_values_sorted

array([79.69515991, 80.35492706, 80.422966  , 80.64637756, 81.91768646,
       83.07598114, 83.88580322, 84.56570435, 84.79779053, 86.17538452,
       87.02137756, 87.55823517, 87.67756653, 87.70539856, 87.73178101,
       88.37545776, 88.49874878, 89.62384796, 92.31980896, 92.90814209,
       92.96176147, 93.94161987, 94.0986557 , 94.61588287])

In [None]:
# calculate the mean values for each percentile for the yesterday outputs
def calculate_percentile_index(p,arr):
  x=arr
  pcen=np.percentile(x,p,interpolation='nearest')
  index_near=abs(x-pcen).argmin()
  return pcen, index_near


In [299]:
deepar_t_p25, deepar_t_index25 = calculate_percentile_index(p=25, arr=deepar_predlist_today_values_sorted)

In [300]:
deepar_t_p25

83.88580322265625

In [301]:
deepar_t_index25

6

In [302]:
deepar_t_p50, deepar_t_index50 = calculate_percentile_index(p=50, arr=deepar_predlist_today_values_sorted)

In [303]:
deepar_t_p50

87.67756652832031

In [304]:
deepar_t_index50

12

In [305]:
deepar_t_p75, deepar_t_index75 = calculate_percentile_index(p=75, arr=deepar_predlist_today_values_sorted)

In [306]:
deepar_t_p75


89.62384796142578

In [307]:
deepar_t_index75

17

In [308]:
deepar_t_p100, deepar_t_index100 = calculate_percentile_index(p=100, arr=deepar_predlist_today_values_sorted)

In [309]:
deepar_t_p100

94.61588287353516

In [310]:
deepar_t_index100

23

In [311]:
# Calculate Mean of the values lying below the 25 percentile.
deepar_yesterday_values_25mean =  np.mean(deepar_predlist_today_values_sorted[0:6])

In [312]:
deepar_yesterday_values_25mean

81.0188496907552

In [313]:
# Calculate Mean of the values lying below the 25 percentile.
deepar_yesterday_values_50mean =  np.mean(deepar_predlist_today_values_sorted[0:12])

In [314]:
deepar_yesterday_values_50mean

83.34311612447102

In [315]:
# Calculate Mean of the values lying below the 25 percentile.
deepar_yesterday_values_75mean =  np.mean(deepar_predlist_today_values_sorted[0:17])

In [316]:
deepar_yesterday_values_75mean

84.71213800766888

In [317]:
# Calculate Mean of the values lying below the 25 percentile.
deepar_yesterday_values_100mean =  np.mean(deepar_predlist_today_values_sorted[0:23])

In [318]:
deepar_yesterday_values_100mean

86.78087748651919

In [319]:
# create a dataframe today stats which holds the mean stats values.
deepar_today_stats = pd.DataFrame({'mean_stats':[81.0188496907552,83.34311612447102,84.71213800766888,86.78087748651919],'index':['25%','50%','75%','100%']})

In [320]:
deepar_today_stats

Unnamed: 0,mean_stats,index
0,81.01885,25%
1,83.343116,50%
2,84.712138,75%
3,86.780877,100%


In [321]:
deepar_today_stats=deepar_today_stats.set_index("index")

In [322]:
deepar_today_stats

Unnamed: 0_level_0,mean_stats
index,Unnamed: 1_level_1
25%,81.01885
50%,83.343116
75%,84.712138
100%,86.780877


In [325]:
deepar_predlist_today_values_copy = deepar_predlist_today_values.copy()

In [330]:
deepar_predlist_today_values_secondcopy = deepar_predlist_today_values_copy.copy()

In [331]:
# transform data according to lstm_yesterday_stats
def transform_input_data(arr, input_stats):
  for i in range(len(arr)):
    if arr[i] <= input_stats.loc["25%"].values[0]:
      arr[i] = input_stats.loc["25%"].values[0]
    elif arr[i] > input_stats.loc["25%"].values[0] and arr[i] <= input_stats.loc["50%"].values[0]:
      arr[i] = input_stats.loc["50%"].values[0]
    elif arr[i] > input_stats.loc["50%"].values[0] and arr[i] <= input_stats.loc["75%"].values[0]:
      arr[i] = input_stats.loc["75%"].values[0]
    else:
      arr[i] = input_stats.loc["100%"].values[0]
  return arr


In [332]:
transform_deep_today_data = transform_input_data(deepar_predlist_today_values_secondcopy,deepar_today_stats)
#transform_lstm_today_data = transform_input_data(customLSTM_today_maxcpu_copy_values,input_stats)

In [333]:
transform_deep_today_data

array([86.78087749, 83.34311612, 86.78087749, 86.78087749, 81.01884969,
       86.78087749, 81.01884969, 83.34311612, 86.78087749, 86.78087749,
       86.78087749, 86.78087749, 86.78087749, 86.78087749, 86.78087749,
       86.78087749, 81.01884969, 86.78087749, 86.78087749, 84.71213801,
       86.78087749, 81.01884969, 84.71213801, 86.78087749])

In [337]:
transformed_deepAR_prediction_df=pd.DataFrame({"timestamp":TIMESTAMP,"avgcpu":transform_deep_today_data})

In [338]:
transformed_deepAR_prediction_df

Unnamed: 0,timestamp,avgcpu
0,2020-12-01 00:00:00,86.780877
1,2020-12-01 01:00:00,83.343116
2,2020-12-01 02:00:00,86.780877
3,2020-12-01 03:00:00,86.780877
4,2020-12-01 04:00:00,81.01885
5,2020-12-01 05:00:00,86.780877
6,2020-12-01 06:00:00,81.01885
7,2020-12-01 07:00:00,83.343116
8,2020-12-01 08:00:00,86.780877
9,2020-12-01 09:00:00,86.780877


In [340]:
transformed_deepAR_prediction_df.to_csv("deepAR_predictions.csv")

In [335]:
from IPython.display import display, HTML

base_html = """
<!DOCTYPE html>
<html>
  <head>
  <script type="text/javascript" src="http://kozea.github.com/pygal.js/javascripts/svg.jquery.js"></script>
  <script type="text/javascript" src="https://kozea.github.io/pygal.js/2.0.x/pygal-tooltips.min.js""></script>
  </head>
  <body>
    <figure>
      {rendered_chart}
    </figure>
  </body>
</html>
"""

In [336]:
TIME = input_cpu["timestamp"].copy()
bar_chart_d = pg.Bar()
bar_chart_d.title = 'CPUUtilization prediction'
bar_chart_d.x_labels = map(str, TIME)
bar_chart_d.add('INPUT',input_cpu_transformed)
bar_chart_d.add('DeepARtoday',transform_deep_today_data)
#bar_chart_t.add('sorted',customLSTM_today_maxcpu)
#bar_chart.add('customLSTMtoday',transformed_output_lstm)
#line_chart.add('IE',      [85.8, 84.6, 84.7, 74.5,   66, 58.6, 54.7, 44.8, 36.2, 26.6, 20.1])
#line_chart.add('Others',  [14.2, 15.4, 15.3,  8.9,    9, 10.4,  8.9,  5.8,  6.7,  6.8,  7.5])
#line_chart.render()
display(HTML(base_html.format(rendered_chart=bar_chart_d.render(is_unicode=True))))