In [16]:
# azureml-core of version 1.0.72 or higher is required
# azureml-dataprep[pandas] of version 1.1.34 or higher is required
import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
import seaborn as sns

from pandas.io.json import json_normalize

In [17]:
#Definitions

today = pd.to_datetime('today').normalize()
current_eom = today + pd.offsets.MonthEnd(0)
start_date = "2017-01-01"
end_date = current_eom + pd.offsets.MonthEnd(11)
studio_debug = "London"
debug = True

def get_json(df):
    """ Small function to serialise DataFrame dates as 'YYYY-MM-DD' in JSON """

    def convert_timestamp(item_date_object):
        if isinstance(item_date_object, (datetime.date, datetime.datetime)):
            return item_date_object.strftime("%Y-%m-%d")
    
    dict_ = df.to_dict(orient="records")

    return json.dumps(dict_, default=convert_timestamp)

#display count and summary of any dataframe
pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)
pd.set_option('display.precision', 1)
pd.set_option('display.float_format', lambda x: '%.2f' % x)

def get_df_name(df):
    name =[x for x in globals() if globals()[x] is df][0]
    return name

def difflist(li1, li2): 
    return (list(set(li1) - set(li2)))

def addlist(li1, li2): 
    return (li1.append(li2))

def remove_percetage(df, column_list):
    for col in column_list:
        df[col] = round(df[col].str.replace('%','').astype(np.float64) / 100, 4)
    return df

def coerce_df_columns_to_numeric(df):
    cols_float1 = list(df.filter(like='Rate', axis=1).columns)
    cols_float2 = list(df.filter(like='Yield', axis=1).columns)
    cols_float3 = list(df.filter(like='Diff%', axis=1).columns)
    cols_float4 = list(df.filter(like='Relative_Offset', axis=1).columns)
    cols_float5 = list(df.filter(like='sp500', axis=1).columns)
    cols_float6 = list(df.filter(like='Return', axis=1).columns)
    cols_float = cols_float1 + cols_float2 + cols_float3 + cols_float4 + cols_float5 + cols_float6
    #display("cols_float:", cols_float)
    cols_int1 = list(df.filter(like='Revenue', axis=1).columns)
    cols_int2 = list(df.filter(like='Conversions', axis=1).columns)
    cols_int3 = list(df.filter(like='Value', axis=1).columns)
    cols_int4 = list(df.filter(like='Pipeline', axis=1).columns)
    cols_int5 = list(df.filter(like='Offset', axis=1).columns)
    cols_int6 = list(df.filter(like='Headcount', axis=1).columns)
    cols_int = cols_int1 + cols_int2 + cols_int3 + cols_int4 + cols_int5 + cols_int6
    #display("cols_int:", cols_int)
    cols1 = list(df.select_dtypes(include='float64').columns)
    cols = cols1 + cols_int
    #display("cols:", cols)
    final_cols = difflist(cols,cols_float)
    #display("final_cols:", final_cols)
    df[final_cols] = df[final_cols].apply(pd.to_numeric, errors='coerce')
    df[final_cols] = df[final_cols].replace(np.nan, 0, regex=True)
    df[final_cols] = df[final_cols].astype(int)
    df[final_cols] = round(df[final_cols],0)
    #return df

def data_prep(df):
    df.columns = df.columns.astype(str).str.replace(" ", "_")
    if 'End_of_Month' in df.columns:
        df['End_of_Month'] = pd.to_datetime(df['End_of_Month']) #Format Date
    if 'Snapshot_Date_Short' in df.columns:
        df['Snapshot_Date_Short'] = pd.to_datetime(df['Snapshot_Date_Short']) #Format Date
    df = df.query('Studio not in @exclude_studio')
    df = df.replace(np.nan, 0, regex=True)
    return df

def show_stats(df):   
    print("\n DF Name: \n")
    display(get_df_name(df))
    print("\n DF Info: \n")
    display(df.info(verbose=True))
    print("\n DF Describe: \n")
    display(df.describe(include='all').transpose().head())
    print("\n DF Head: \n")
    display(df.head())
    print("\n DF Tail: \n")
    display(df.tail())
    group_by_studio = df.groupby(by=['Studio'], as_index=False)
    studio_sum = group_by_studio.sum().reset_index(drop=True)
    studio_count = group_by_studio.count().reset_index(drop=True)
    print("\n Studio Sum: \n")
    display(studio_sum.head())
    print("\n Studio Count: \n")
    display(studio_count.head())
    if 'End_of_Month' in df.columns:
        df['End_of_Month'] = pd.to_datetime(df['End_of_Month']) #Format Date
        group_by_eom = df.groupby(by=['End_of_Month'], as_index=False)
        eom_sum = group_by_eom.sum().reset_index(drop=True)
        eom_count = group_by_eom.count().reset_index(drop=True)
        print("\n EOM Sum:")
        display(eom_sum.head())
        print("\n EOM Count: \n")
        display(eom_count.head())
    if 'Snapshot_Date_Short' in df.columns:
        df['Snapshot_Date_Short'] = pd.to_datetime(df['Snapshot_Date_Short']) #Format Date
        group_by_sds = df.groupby(by=['Snapshot_Date_Short'], as_index=False)
        sds_sum = group_by_sds.sum().reset_index(drop=True)
        sds_count = group_by_sds.count().reset_index(drop=True)
        print("\n SDS Sum:")
        display(sds_sum.head())
        print("\n SDS Count: \n")
        display(sds_count.head())
    return

In [18]:
# azureml-core of version 1.0.72 or higher is required
# azureml-dataprep[pandas] of version 1.1.34 or higher is required
# azureml-core of version 1.0.72 or higher is required
# azureml-dataprep[pandas] of version 1.1.34 or higher is required
from azureml.core import Workspace, Dataset

subscription_id = 'db61fd47-db56-45e3-844f-1b1f5c47990a'
resource_group = 'BI-DevQA-RG'
workspace_name = 'dwmlazwu01'

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(workspace, name='revpipe_final')

dataset.to_pandas_dataframe()
df = dataset.to_pandas_dataframe()
df.columns = df.columns.astype(str).str.replace(" ", "_")
#df['Snapshot_Date_Short'] = pd.to_datetime(df['Snapshot_Date_Short'])
df = df.replace(np.nan, 0, regex=True)
show_stats(df)
#df.columns.name=None
df = df.reset_index(drop = True)

#df['End_of_Month'] = pd.to_datetime(df['End_of_Month'])
print("df :")
display(df.head())


 DF Name: 



'df'


 DF Info: 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2932 entries, 0 to 2931
Data columns (total 13 columns):
Snapshot_Date_Short               2932 non-null datetime64[ns]
Relative_Snapshot_Month_Offset    2932 non-null int64
Studio                            2932 non-null object
Pipeline                          2932 non-null int64
Active_Unrecognized_Revenue       2932 non-null int64
Opportunity_Revenue               2932 non-null int64
Pipeline_3_Month_Rolling_Avg      2932 non-null int64
Pipeline_3_Month_Rolling_Sum      2932 non-null int64
Pipeline_MoM                      2932 non-null int64
Pipeline_3M_Forward_Sum           2932 non-null int64
Pipeline_3M_Forward_Avg           2932 non-null int64
Relative_Offset                   2932 non-null object
Revenue                           2932 non-null int64
dtypes: datetime64[ns](1), int64(10), object(2)
memory usage: 297.9+ KB


None


 DF Describe: 



Unnamed: 0,count,unique,top,freq,first,last,mean,std,min,25%,50%,75%,max
Snapshot_Date_Short,2932.0,26.0,2020-01-31 00:00:00,119.0,2018-07-31 00:00:00,2020-08-31 00:00:00,,,,,,,
Relative_Snapshot_Month_Offset,2932.0,,,,,,-12.17,7.42,-25.0,-18.0,-12.0,-6.0,0.0
Studio,2932.0,17.0,Super,182.0,,,,,,,,,
Pipeline,2932.0,,,,,,609340.77,594763.42,0.0,161983.0,461497.0,840641.25,4054734.0
Active_Unrecognized_Revenue,2932.0,,,,,,331248.72,452631.43,0.0,25610.25,142375.0,490018.0,3225198.0



 DF Head: 



Unnamed: 0,Snapshot_Date_Short,Relative_Snapshot_Month_Offset,Studio,Pipeline,Active_Unrecognized_Revenue,Opportunity_Revenue,Pipeline_3_Month_Rolling_Avg,Pipeline_3_Month_Rolling_Sum,Pipeline_MoM,Pipeline_3M_Forward_Sum,Pipeline_3M_Forward_Avg,Relative_Offset,Revenue
0,2018-07-31,-25,Cambridge,1204028,1190091,13937,1290427,3871282,-122730,4598863,1532954,Relative-00,1167320
1,2018-07-31,-25,Chicago,1727025,1727025,0,1925101,5775305,-196920,6254188,2084729,Relative-00,1471901
2,2018-07-31,-25,D4C,1449893,1449893,0,1450284,4350854,-43844,4649230,1549743,Relative-00,1355137
3,2018-07-31,-25,D4L,508838,508838,0,573807,1721423,-96082,1906705,635568,Relative-00,526425
4,2018-07-31,-25,Food,454369,454369,0,656938,1970816,-271108,1520891,506963,Relative-00,446128



 DF Tail: 



Unnamed: 0,Snapshot_Date_Short,Relative_Snapshot_Month_Offset,Studio,Pipeline,Active_Unrecognized_Revenue,Opportunity_Revenue,Pipeline_3_Month_Rolling_Avg,Pipeline_3_Month_Rolling_Sum,Pipeline_MoM,Pipeline_3M_Forward_Sum,Pipeline_3M_Forward_Avg,Relative_Offset,Revenue
2927,2020-08-31,0,OpenIDEO,240655,206663,33992,276023,828071,-125899,804562,268187,Relative-06,0
2928,2020-08-31,0,SF OS,22873,22873,0,34783,104350,-1830,71867,35933,Relative-06,0
2929,2020-08-31,0,Shanghai,218382,85482,132900,263579,790737,17566,465409,155136,Relative-06,0
2930,2020-08-31,0,Super,228157,0,228157,609251,1827755,-611877,616321,205440,Relative-06,0
2931,2020-08-31,0,Tokyo,198632,0,198632,234794,704383,-44005,365010,182505,Relative-06,0



 Studio Sum: 



Unnamed: 0,Studio,Relative_Snapshot_Month_Offset,Pipeline,Active_Unrecognized_Revenue,Opportunity_Revenue,Pipeline_3_Month_Rolling_Avg,Pipeline_3_Month_Rolling_Sum,Pipeline_MoM,Pipeline_3M_Forward_Sum,Pipeline_3M_Forward_Avg,Revenue
0,Cambridge,-2275,170285058,90312400,79929882,194583035,583749267,-25838256,423999405,141333084,221510902
1,Chicago,-2275,240284802,143723449,96430116,271698684,814915067,-33389088,602520941,201176803,317464980
2,Creative Leadership,-1088,3726387,2954354,771624,4445245,12670653,-573635,8516914,3198713,3974185
3,D4C,-2275,161672923,96398797,65355076,182522106,547566530,-21665383,411871859,137357669,195535159
4,D4L,-2275,80525386,39136289,41389032,93093741,279196767,-13050856,198086772,66340673,111042815



 Studio Count: 



Unnamed: 0,Studio,Snapshot_Date_Short,Relative_Snapshot_Month_Offset,Pipeline,Active_Unrecognized_Revenue,Opportunity_Revenue,Pipeline_3_Month_Rolling_Avg,Pipeline_3_Month_Rolling_Sum,Pipeline_MoM,Pipeline_3M_Forward_Sum,Pipeline_3M_Forward_Avg,Relative_Offset,Revenue
0,Cambridge,182,182,182,182,182,182,182,182,182,182,182,182
1,Chicago,182,182,182,182,182,182,182,182,182,182,182,182
2,Creative Leadership,125,125,125,125,125,125,125,125,125,125,125,125
3,D4C,182,182,182,182,182,182,182,182,182,182,182,182
4,D4L,182,182,182,182,182,182,182,182,182,182,182,182



 SDS Sum:


Unnamed: 0,Snapshot_Date_Short,Relative_Snapshot_Month_Offset,Pipeline,Active_Unrecognized_Revenue,Opportunity_Revenue,Pipeline_3_Month_Rolling_Avg,Pipeline_3_Month_Rolling_Sum,Pipeline_MoM,Pipeline_3M_Forward_Sum,Pipeline_3M_Forward_Avg,Revenue
0,2018-07-31,-2600,62541094,34196036,28345026,72224380,215995127,-9980367,155166892,52147169,80362677
1,2018-08-31,-2520,64521880,32984599,31537249,75103499,224446761,-10534174,158645026,53521459,80601571
2,2018-09-30,-2415,64923311,34492742,30405756,75375422,224358331,-10481405,156083062,53149155,81400244
3,2018-10-31,-2266,53202898,33955056,19220561,63993180,190249111,-10406662,122332825,41472066,82252270
4,2018-11-30,-2142,58576532,27946435,30597440,68292548,203478648,-10177099,146419099,49659401,82085957



 SDS Count: 



Unnamed: 0,Snapshot_Date_Short,Relative_Snapshot_Month_Offset,Studio,Pipeline,Active_Unrecognized_Revenue,Opportunity_Revenue,Pipeline_3_Month_Rolling_Avg,Pipeline_3_Month_Rolling_Sum,Pipeline_MoM,Pipeline_3M_Forward_Sum,Pipeline_3M_Forward_Avg,Relative_Offset,Revenue
0,2018-07-31,104,104,104,104,104,104,104,104,104,104,104,104
1,2018-08-31,105,105,105,105,105,105,105,105,105,105,105,105
2,2018-09-30,105,105,105,105,105,105,105,105,105,105,105,105
3,2018-10-31,103,103,103,103,103,103,103,103,103,103,103,103
4,2018-11-30,102,102,102,102,102,102,102,102,102,102,102,102


df :


Unnamed: 0,Snapshot_Date_Short,Relative_Snapshot_Month_Offset,Studio,Pipeline,Active_Unrecognized_Revenue,Opportunity_Revenue,Pipeline_3_Month_Rolling_Avg,Pipeline_3_Month_Rolling_Sum,Pipeline_MoM,Pipeline_3M_Forward_Sum,Pipeline_3M_Forward_Avg,Relative_Offset,Revenue
0,2018-07-31,-25,Cambridge,1204028,1190091,13937,1290427,3871282,-122730,4598863,1532954,Relative-00,1167320
1,2018-07-31,-25,Chicago,1727025,1727025,0,1925101,5775305,-196920,6254188,2084729,Relative-00,1471901
2,2018-07-31,-25,D4C,1449893,1449893,0,1450284,4350854,-43844,4649230,1549743,Relative-00,1355137
3,2018-07-31,-25,D4L,508838,508838,0,573807,1721423,-96082,1906705,635568,Relative-00,526425
4,2018-07-31,-25,Food,454369,454369,0,656938,1970816,-271108,1520891,506963,Relative-00,446128


In [19]:
#pivot
df_pivot = df.pivot_table(index=['Snapshot_Date_Short','Relative_Offset'], columns='Studio', values='Revenue', aggfunc=np.sum, margins=False)
#df3_pivot1['EOM1'] = pd.to_datetime(df3_pivot1['End_of_Month']).dt.date.astype(str)
#df_pivot.columns.name=None
#df_pivot = df_pivot.reset_index()
df_pivot = df_pivot.reset_index(level=df_pivot.index.names)
df_pivot

Studio,Snapshot_Date_Short,Relative_Offset,Cambridge,Chicago,Creative Leadership,D4C,D4L,Food,Global,Health,London,Munich,New York,OpenIDEO,SF OS,Shanghai,Super,Teachers Guild,Tokyo
0,2018-07-31,Relative-00,1167320.00,1471901.00,,1355137.00,526425.00,446128.00,489498.00,,1078885.00,565461.00,892148.00,268061.00,212325.00,732704.00,1344107.00,28845.00,646432.00
1,2018-07-31,Relative-01,1615715.00,2311118.00,,1126195.00,640650.00,685434.00,143473.00,,911414.00,430848.00,1009904.00,214658.00,119014.00,527301.00,1372663.00,49610.00,725772.00
2,2018-07-31,Relative-02,1223964.00,2002735.00,,998130.00,791091.00,518134.00,112534.00,,783017.00,715636.00,1046940.00,94298.00,224575.00,747603.00,1176530.00,210043.00,665625.00
3,2018-07-31,Relative-03,1474613.00,1877350.00,,1139782.00,756216.00,1070926.00,0.00,,975430.00,771263.00,1201195.00,224718.00,313787.00,603605.00,1722958.00,118269.00,748174.00
4,2018-07-31,Relative-04,1219103.00,1494602.00,,1226198.00,538323.00,609125.00,410265.00,,776039.00,793417.00,986323.00,106060.00,162300.00,1051509.00,1341427.00,66769.00,711594.00
5,2018-07-31,Relative-05,931666.00,1880015.00,,842861.00,628613.00,513832.00,7041.00,,773985.00,472400.00,878431.00,181320.00,329977.00,743223.00,1190392.00,29325.00,647591.00
6,2018-07-31,Relative-06,1252541.00,2251215.00,,732754.00,673082.00,625342.00,,,818923.00,506888.00,699725.00,228603.00,166996.00,1237242.00,1338196.00,110174.00,758983.00
7,2018-08-31,Relative-00,1615715.00,2311118.00,,1126195.00,640650.00,685434.00,143473.00,,911414.00,430848.00,1009904.00,214658.00,119014.00,527301.00,1372663.00,49610.00,725772.00
8,2018-08-31,Relative-01,1223964.00,2002735.00,,998130.00,791091.00,518134.00,112534.00,,783017.00,715636.00,1046940.00,94298.00,224575.00,747603.00,1176530.00,210043.00,665625.00
9,2018-08-31,Relative-02,1474613.00,1877350.00,,1139782.00,756216.00,1070926.00,0.00,,975430.00,771263.00,1201195.00,224718.00,313787.00,603605.00,1722958.00,118269.00,748174.00


In [20]:
#Limit for forecast horizon
revpipe = df.copy()
revpipe.drop(columns=['Revenue'], axis=1, inplace = True)
forecast_horizon = 6 #months
#today = datetime.date.today()
#today_plus_month_offset = today + pd.offsets.MonthEnd(6)
#lt_plus_month_offset = pd.to_datetime(df1['End of Month']) <= today_plus_month_offset
#forecast_horizon1 = revpipe['Relative_Month_Offset'] < forecast_horizon
#revpipe = revpipe.query('Relative_Month_Offset < @forecast_horizon')
show_stats(revpipe)


 DF Name: 



'revpipe'


 DF Info: 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2932 entries, 0 to 2931
Data columns (total 12 columns):
Snapshot_Date_Short               2932 non-null datetime64[ns]
Relative_Snapshot_Month_Offset    2932 non-null int64
Studio                            2932 non-null object
Pipeline                          2932 non-null int64
Active_Unrecognized_Revenue       2932 non-null int64
Opportunity_Revenue               2932 non-null int64
Pipeline_3_Month_Rolling_Avg      2932 non-null int64
Pipeline_3_Month_Rolling_Sum      2932 non-null int64
Pipeline_MoM                      2932 non-null int64
Pipeline_3M_Forward_Sum           2932 non-null int64
Pipeline_3M_Forward_Avg           2932 non-null int64
Relative_Offset                   2932 non-null object
dtypes: datetime64[ns](1), int64(9), object(2)
memory usage: 275.0+ KB


None


 DF Describe: 



Unnamed: 0,count,unique,top,freq,first,last,mean,std,min,25%,50%,75%,max
Snapshot_Date_Short,2932.0,26.0,2020-01-31 00:00:00,119.0,2018-07-31 00:00:00,2020-08-31 00:00:00,,,,,,,
Relative_Snapshot_Month_Offset,2932.0,,,,,,-12.17,7.42,-25.0,-18.0,-12.0,-6.0,0.0
Studio,2932.0,17.0,Super,182.0,,,,,,,,,
Pipeline,2932.0,,,,,,609340.77,594763.42,0.0,161983.0,461497.0,840641.25,4054734.0
Active_Unrecognized_Revenue,2932.0,,,,,,331248.72,452631.43,0.0,25610.25,142375.0,490018.0,3225198.0



 DF Head: 



Unnamed: 0,Snapshot_Date_Short,Relative_Snapshot_Month_Offset,Studio,Pipeline,Active_Unrecognized_Revenue,Opportunity_Revenue,Pipeline_3_Month_Rolling_Avg,Pipeline_3_Month_Rolling_Sum,Pipeline_MoM,Pipeline_3M_Forward_Sum,Pipeline_3M_Forward_Avg,Relative_Offset
0,2018-07-31,-25,Cambridge,1204028,1190091,13937,1290427,3871282,-122730,4598863,1532954,Relative-00
1,2018-07-31,-25,Chicago,1727025,1727025,0,1925101,5775305,-196920,6254188,2084729,Relative-00
2,2018-07-31,-25,D4C,1449893,1449893,0,1450284,4350854,-43844,4649230,1549743,Relative-00
3,2018-07-31,-25,D4L,508838,508838,0,573807,1721423,-96082,1906705,635568,Relative-00
4,2018-07-31,-25,Food,454369,454369,0,656938,1970816,-271108,1520891,506963,Relative-00



 DF Tail: 



Unnamed: 0,Snapshot_Date_Short,Relative_Snapshot_Month_Offset,Studio,Pipeline,Active_Unrecognized_Revenue,Opportunity_Revenue,Pipeline_3_Month_Rolling_Avg,Pipeline_3_Month_Rolling_Sum,Pipeline_MoM,Pipeline_3M_Forward_Sum,Pipeline_3M_Forward_Avg,Relative_Offset
2927,2020-08-31,0,OpenIDEO,240655,206663,33992,276023,828071,-125899,804562,268187,Relative-06
2928,2020-08-31,0,SF OS,22873,22873,0,34783,104350,-1830,71867,35933,Relative-06
2929,2020-08-31,0,Shanghai,218382,85482,132900,263579,790737,17566,465409,155136,Relative-06
2930,2020-08-31,0,Super,228157,0,228157,609251,1827755,-611877,616321,205440,Relative-06
2931,2020-08-31,0,Tokyo,198632,0,198632,234794,704383,-44005,365010,182505,Relative-06



 Studio Sum: 



Unnamed: 0,Studio,Relative_Snapshot_Month_Offset,Pipeline,Active_Unrecognized_Revenue,Opportunity_Revenue,Pipeline_3_Month_Rolling_Avg,Pipeline_3_Month_Rolling_Sum,Pipeline_MoM,Pipeline_3M_Forward_Sum,Pipeline_3M_Forward_Avg
0,Cambridge,-2275,170285058,90312400,79929882,194583035,583749267,-25838256,423999405,141333084
1,Chicago,-2275,240284802,143723449,96430116,271698684,814915067,-33389088,602520941,201176803
2,Creative Leadership,-1088,3726387,2954354,771624,4445245,12670653,-573635,8516914,3198713
3,D4C,-2275,161672923,96398797,65355076,182522106,547566530,-21665383,411871859,137357669
4,D4L,-2275,80525386,39136289,41389032,93093741,279196767,-13050856,198086772,66340673



 Studio Count: 



Unnamed: 0,Studio,Snapshot_Date_Short,Relative_Snapshot_Month_Offset,Pipeline,Active_Unrecognized_Revenue,Opportunity_Revenue,Pipeline_3_Month_Rolling_Avg,Pipeline_3_Month_Rolling_Sum,Pipeline_MoM,Pipeline_3M_Forward_Sum,Pipeline_3M_Forward_Avg,Relative_Offset
0,Cambridge,182,182,182,182,182,182,182,182,182,182,182
1,Chicago,182,182,182,182,182,182,182,182,182,182,182
2,Creative Leadership,125,125,125,125,125,125,125,125,125,125,125
3,D4C,182,182,182,182,182,182,182,182,182,182,182
4,D4L,182,182,182,182,182,182,182,182,182,182,182



 SDS Sum:


Unnamed: 0,Snapshot_Date_Short,Relative_Snapshot_Month_Offset,Pipeline,Active_Unrecognized_Revenue,Opportunity_Revenue,Pipeline_3_Month_Rolling_Avg,Pipeline_3_Month_Rolling_Sum,Pipeline_MoM,Pipeline_3M_Forward_Sum,Pipeline_3M_Forward_Avg
0,2018-07-31,-2600,62541094,34196036,28345026,72224380,215995127,-9980367,155166892,52147169
1,2018-08-31,-2520,64521880,32984599,31537249,75103499,224446761,-10534174,158645026,53521459
2,2018-09-30,-2415,64923311,34492742,30405756,75375422,224358331,-10481405,156083062,53149155
3,2018-10-31,-2266,53202898,33955056,19220561,63993180,190249111,-10406662,122332825,41472066
4,2018-11-30,-2142,58576532,27946435,30597440,68292548,203478648,-10177099,146419099,49659401



 SDS Count: 



Unnamed: 0,Snapshot_Date_Short,Relative_Snapshot_Month_Offset,Studio,Pipeline,Active_Unrecognized_Revenue,Opportunity_Revenue,Pipeline_3_Month_Rolling_Avg,Pipeline_3_Month_Rolling_Sum,Pipeline_MoM,Pipeline_3M_Forward_Sum,Pipeline_3M_Forward_Avg,Relative_Offset
0,2018-07-31,104,104,104,104,104,104,104,104,104,104,104
1,2018-08-31,105,105,105,105,105,105,105,105,105,105,105
2,2018-09-30,105,105,105,105,105,105,105,105,105,105,105
3,2018-10-31,103,103,103,103,103,103,103,103,103,103,103
4,2018-11-30,102,102,102,102,102,102,102,102,102,102,102


In [21]:
import datetime
from dateutil.tz import tzutc

from pandas.io.json import json_normalize

In [22]:
#data = revenueforecastdf.to_json(orient="records")
# Convert to JSON string
import numpy as np

def get_json(df):
    """ Small function to serialise DataFrame dates as 'YYYY-MM-DD' in JSON """

    def convert_timestamp(item_date_object):
        if isinstance(item_date_object, (datetime.date, datetime.datetime)):
            return item_date_object.strftime("%Y-%m-%d")
    
    dict_ = df.to_dict(orient="records")

    return json.dumps(dict_, default=convert_timestamp)

#df1 = df.replace(np.nan, 0, regex=True)

input_data = get_json(revpipe)
#data_loads = json.loads(input_data)
#data_loads['data']
print("Input Data: ",type(input_data))
input_data = "{\"data\" :" + input_data + "}"
input_data[0:2000]

Input Data:  <class 'str'>


'{"data" :[{"Snapshot_Date_Short": "2018-07-31", "Relative_Snapshot_Month_Offset": -25, "Studio": "Cambridge", "Pipeline": 1204028, "Active_Unrecognized_Revenue": 1190091, "Opportunity_Revenue": 13937, "Pipeline_3_Month_Rolling_Avg": 1290427, "Pipeline_3_Month_Rolling_Sum": 3871282, "Pipeline_MoM": -122730, "Pipeline_3M_Forward_Sum": 4598863, "Pipeline_3M_Forward_Avg": 1532954, "Relative_Offset": "Relative-00"}, {"Snapshot_Date_Short": "2018-07-31", "Relative_Snapshot_Month_Offset": -25, "Studio": "Chicago", "Pipeline": 1727025, "Active_Unrecognized_Revenue": 1727025, "Opportunity_Revenue": 0, "Pipeline_3_Month_Rolling_Avg": 1925101, "Pipeline_3_Month_Rolling_Sum": 5775305, "Pipeline_MoM": -196920, "Pipeline_3M_Forward_Sum": 6254188, "Pipeline_3M_Forward_Avg": 2084729, "Relative_Offset": "Relative-00"}, {"Snapshot_Date_Short": "2018-07-31", "Relative_Snapshot_Month_Offset": -25, "Studio": "D4C", "Pipeline": 1449893, "Active_Unrecognized_Revenue": 1449893, "Opportunity_Revenue": 0, "Pip

In [23]:
# URL for the web service
scoring_uri = 'http://668050fb-e3c9-4acf-b373-49693d14b64a.westus2.azurecontainer.io/score'
# If the service is authenticated, set the key or token
key = '81W5YhDau9lduzaWYZz4mW2iQI4i0VAq'

# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
headers['Authorization'] = f'Bearer {key}'

In [24]:
# Make the request and display the response
#while True:
#    try:
resp = requests.post(scoring_uri, input_data, headers=headers)
json_data = resp.text
print("Json Data:", type(json_data))
json_data[0:2000]
#   except Exception as e:
#        result = str(e)
#        print(json.dumps({"error": result}))
#df_rest = pd.DataFrame.from_records(resp.json())
#df_rest

Json Data: <class 'str'>


'"{\\"result\\": [1218481.4864761715, 1734880.9850887246, 1218886.7522577099, 555190.2415409753, 542101.9853779908, 179099.4767107419, 1032128.1212043397, 600980.591495416, 881194.0468500144, 245948.45917056766, 225866.0981612376, 777408.135480632, 1367848.847797604, 88012.49485916905, 697341.8760288832, 1490081.3879720722, 2174102.3432478136, 1159813.3127096551, 654849.3498411751, 648282.2461522869, 152351.21574874513, 911461.8039311555, 521008.9785903096, 1006502.3609065828, 156363.969482645, 207958.7475761086, 639904.8712865947, 1405276.890796692, 83637.26879908216, 737923.3532887441, 1320028.8926925203, 1837674.8862170419, 1093640.2254357433, 666867.5242914098, 581617.3630745193, 158325.577452688, 889702.209793509, 576516.6808094555, 997286.4909065483, 142315.9091425446, 211943.0537285238, 690279.5947143676, 1418975.3558708802, 85720.07744406999, 711231.1574078656, 1371801.3496616154, 1948972.3742372796, 1119241.4953127203, 675826.2780246146, 697350.0448594817, 146325.83036052305, 

In [25]:
#Convert response to python dictionary
import datetime
from dateutil.tz import tzutc

r_json = resp.json()
r_json_loads = json.loads(r_json)
type(r_json_loads)
print("Data Dict Type=", type(r_json_loads))
print("Data Dict Keys=",r_json_loads.keys())
data_dict = r_json_loads
#data_dict['forecast']
#for i in data_dict:
#    if isinstance(i, dict):
#        for key, value in i.items():
#            print(key, value)
#    else:
#        print(i)
        
#for key, value in data_dict.items():
#        print(key, ":", value)

Data Dict Type= <class 'dict'>
Data Dict Keys= dict_keys(['result'])


In [26]:
#Convert response to python dictionary
#import datetime
#from dateutil.tz import tzutc

#loaded_json = json.loads(json_data)

#print("Loaded Json=",type(loaded_json))
#norm_json = json_normalize(loaded_json)
#print(norm_json)

#print(json.dumps(loaded_json, indent=4, sort_keys=True))

#data_dict = json.loads(loaded_json)
#print("Data Dict Type=", type(data_dict))
#print("Data Dict Keys=",data_dict.keys())
#data_dict['forecast'][0]
#json_normalize(data_dict)
#json_normalize(data_dict, record_path='index')
#df1 = pd.DataFrame.from_dict(json_normalize(data_dict))
#df1['index']=[df1['index']]
#json_normalize(data_dict['index'])



In [27]:
print("Convert Nested Data Dictornary to flattened Dataframe and datetime to date format")
####Convert Nested Data Dictornary to flattened Dataframe
df1 = pd.DataFrame(data_dict)
df2 = pd.DataFrame()
for col in reversed(df1.columns):
    display(col)
    norm = json_normalize(data_dict, record_path=col).add_prefix(f'{col}.')
    df2= pd.concat([df2, norm], axis=1, sort=False)
    df2.columns = df2.columns.astype(str).str.replace("result.", "")
    df2.columns = df2.columns.astype(str).str.replace(".0", "")
    df2.columns = df2.columns.astype(str).str.replace(" ", "_")

df2['Forecast'] = round(df2['0'],0)
df2.drop(columns=['0'], axis=1, inplace = True)
#show_stats(df2)
df2

Convert Nested Data Dictornary to flattened Dataframe and datetime to date format


'result'

Unnamed: 0,Forecast
0,1218481.00
1,1734881.00
2,1218887.00
3,555190.00
4,542102.00
5,179099.00
6,1032128.00
7,600981.00
8,881194.00
9,245948.00


In [28]:
#Concatenate Forecast RESULTS with revpipe_final dataframe
forecast_final = pd.concat([df, df2], axis=1, sort=False)
#forecast_final['EOM'] = pd.to_datetime(forecast_final['End_of_Month']).dt.date.astype(str)
forecast_final['Diff'] = forecast_final['Forecast'] - forecast_final['Revenue']
forecast_final['Diff%'] = round(forecast_final['Diff'] / forecast_final['Revenue'],4)
forecast_final = forecast_final.reset_index(drop=True)
coerce_df_columns_to_numeric(forecast_final)
print("forecast_final info:")
#show_stats(forecast_final)
forecast_final.to_csv("forecast_final.csv", index=False)
forecast_final.query('Studio==@studio_debug and Relative_Snapshot_Month_Offset == -1')

forecast_final info:


Unnamed: 0,Snapshot_Date_Short,Relative_Snapshot_Month_Offset,Studio,Pipeline,Active_Unrecognized_Revenue,Opportunity_Revenue,Pipeline_3_Month_Rolling_Avg,Pipeline_3_Month_Rolling_Sum,Pipeline_MoM,Pipeline_3M_Forward_Sum,Pipeline_3M_Forward_Avg,Relative_Offset,Revenue,Forecast,Diff,Diff%
2711,2020-07-31,-1,London,1065454,1065612,0,997046,2991140,166333,3218367,1072789,Relative-00,1073541,1079450,5909,0.01
2728,2020-07-31,-1,London,895847,866014,29833,953474,2860424,-169607,2861634,953878,Relative-01,933688,963710,30022,0.03
2745,2020-07-31,-1,London,1257064,1156943,100121,1072789,3218367,361217,2503119,834373,Relative-02,895704,1056006,160302,0.18
2762,2020-07-31,-1,London,708722,602160,106562,953878,2861634,-548342,1605368,535122,Relative-03,999912,1013041,13129,0.01
2778,2020-07-31,-1,London,537332,484654,52677,834373,2503119,-171390,942088,314029,Relative-04,964775,956788,-7987,-0.01
2794,2020-07-31,-1,London,359313,314110,45203,535122,1605368,-178018,424645,141548,Relative-05,979337,862699,-116638,-0.12
2810,2020-07-31,-1,London,45442,7471,37971,314029,942088,-313871,65332,32666,Relative-06,928680,790801,-137879,-0.15


In [29]:
forecast_final.query('Studio==@studio_debug and Relative_Snapshot_Month_Offset == 0')

Unnamed: 0,Snapshot_Date_Short,Relative_Snapshot_Month_Offset,Studio,Pipeline,Active_Unrecognized_Revenue,Opportunity_Revenue,Pipeline_3_Month_Rolling_Avg,Pipeline_3_Month_Rolling_Sum,Pipeline_MoM,Pipeline_3M_Forward_Sum,Pipeline_3M_Forward_Avg,Relative_Offset,Revenue,Forecast,Diff,Diff%
2826,2020-08-31,0,London,888884,875052,13831,953849,2861547,-184656,2849227,949742,Relative-00,933688,946958,13270,0.01
2843,2020-08-31,0,London,1263943,1189136,74807,1075456,3226369,375059,2502900,834300,Relative-01,895704,1051393,155689,0.17
2860,2020-08-31,0,London,696398,597284,99114,949742,2849227,-567544,1606417,535472,Relative-02,999912,993172,-6740,-0.01
2877,2020-08-31,0,London,542557,483506,59050,834300,2502900,-153841,962000,320666,Relative-03,964775,926983,-37792,-0.04
2893,2020-08-31,0,London,367461,315277,52184,535472,1606417,-175095,461675,153891,Relative-04,979337,852954,-126383,-0.13
2909,2020-08-31,0,London,51981,7636,44344,320666,962000,-315480,116835,38945,Relative-05,928680,759745,-168935,-0.18
2924,2020-08-31,0,London,42233,0,42233,153891,461675,-9748,64854,32427,Relative-06,0,284617,284617,inf


In [30]:
#pivot by Studio, End_of_Month
forecast_final_pivot = forecast_final.pivot_table(index=['Studio','Snapshot_Date_Short'], columns='Relative_Offset', values='Forecast', aggfunc=np.sum, margins=True)
forecast_final_pivot = forecast_final_pivot.reset_index(level=forecast_final_pivot.index.names)
print("forecast_final_pivot info:")
display(forecast_final_pivot)
#revpipe1_pivot.to_csv("revpipe1_pivot.csv", index=False)

forecast_final_pivot info:


Relative_Offset,Studio,Snapshot_Date_Short,Relative-00,Relative-01,Relative-02,Relative-03,Relative-04,Relative-05,Relative-06,All
0,Cambridge,2018-07-31,1218481.00,1490081.00,1320029.00,1371801.00,1164510.00,1043954.00,1260412.00,8869268
1,Cambridge,2018-08-31,1521138.00,1296199.00,1388182.00,1127599.00,1015171.00,1206882.00,1180516.00,8735687
2,Cambridge,2018-09-30,1247662.00,1410508.00,1336379.00,1060939.00,1230411.00,1315271.00,1236434.00,8837604
3,Cambridge,2018-10-31,1458893.00,1349557.00,1046110.00,1211325.00,1220524.00,1286351.00,1133820.00,8706580
4,Cambridge,2018-11-30,1194808.00,995962.00,1329181.00,1399384.00,1361900.00,1328941.00,1405822.00,9015998
5,Cambridge,2018-12-31,1038563.00,1358649.00,1340264.00,1418885.00,1328177.00,1347165.00,1123865.00,8955568
6,Cambridge,2019-01-31,1292616.00,1346546.00,1353199.00,1407546.00,1367150.00,1087182.00,1077914.00,8932153
7,Cambridge,2019-02-28,1386940.00,1463106.00,1442236.00,1453369.00,1087439.00,1081985.00,1190978.00,9106053
8,Cambridge,2019-03-31,1440428.00,1381352.00,1428021.00,1099104.00,1007655.00,1125474.00,1202992.00,8685026
9,Cambridge,2019-04-30,1377122.00,1562063.00,1174128.00,1022049.00,1091798.00,1223098.00,1309116.00,8759374


In [31]:
#pivot by EOM1, Studio
forecast_final_pivot1 = forecast_final.pivot_table(index=['Snapshot_Date_Short','Relative_Offset'], columns='Studio', values='Forecast', aggfunc=np.sum, margins=True)
forecast_final_pivot1 = forecast_final_pivot1.reset_index(level=forecast_final_pivot1.index.names)
forecast_final_pivot1

Studio,Snapshot_Date_Short,Relative_Offset,Cambridge,Chicago,Creative Leadership,D4C,D4L,Food,Global,Health,London,Munich,New York,OpenIDEO,SF OS,Shanghai,Super,Teachers Guild,Tokyo,All
0,2018-07-31 00:00:00,Relative-00,1218481.00,1734881.00,,1218887.00,555190.00,542102.00,179099.00,,1032128.00,600981.00,881194.00,245948.00,225866.00,777408.00,1367849.00,88012.00,697342.00,11365368
1,2018-07-31 00:00:00,Relative-01,1490081.00,2174102.00,,1159813.00,654849.00,648282.00,152351.00,,911462.00,521009.00,1006502.00,156364.00,207959.00,639905.00,1405277.00,83637.00,737923.00,11949516
2,2018-07-31 00:00:00,Relative-02,1320029.00,1837675.00,,1093640.00,666868.00,581617.00,158326.00,,889702.00,576517.00,997286.00,142316.00,211943.00,690280.00,1418975.00,85720.00,711231.00,11382125
3,2018-07-31 00:00:00,Relative-03,1371801.00,1948972.00,,1119241.00,675826.00,697350.00,146326.00,,940868.00,643022.00,1068636.00,200637.00,258163.00,704707.00,1788529.00,101813.00,783383.00,12449274
4,2018-07-31 00:00:00,Relative-04,1164510.00,1592511.00,,1166392.00,617508.00,653851.00,241642.00,,840863.00,671690.00,918972.00,207398.00,202150.00,817753.00,1590333.00,92528.00,735197.00,11513298
5,2018-07-31 00:00:00,Relative-05,1043954.00,1846979.00,,947960.00,645480.00,602833.00,165670.00,,883627.00,618171.00,769785.00,219451.00,245213.00,791407.00,1454573.00,97242.00,707625.00,11039970
6,2018-07-31 00:00:00,Relative-06,1260412.00,1973357.00,,871010.00,637437.00,686028.00,,,884107.00,645337.00,761094.00,258826.00,277506.00,880603.00,1489647.00,149053.00,782543.00,11556960
7,2018-08-31 00:00:00,Relative-00,1521138.00,2190879.00,,1127967.00,651300.00,633108.00,151410.00,,875992.00,519026.00,948579.00,174627.00,202651.00,593766.00,1364940.00,91162.00,748015.00,11794560
8,2018-08-31 00:00:00,Relative-01,1296199.00,1963577.00,,1043776.00,725920.00,605287.00,163322.00,,882280.00,597111.00,993725.00,146492.00,213371.00,669078.00,1411558.00,84651.00,645298.00,11441645
9,2018-08-31 00:00:00,Relative-02,1388182.00,1808391.00,,1127555.00,879514.00,811598.00,101781.00,,1001561.00,740683.00,994996.00,189980.00,235710.00,743882.00,1784919.00,94826.00,770486.00,12674064


In [None]:
****************************************************************************************************
STOP
#Add Revenue History Column
#revpipe_final = pd.read_csv("revpipe_final.csv")
#revpipe_final1 = revpipe_final[['Snapshot_Date_Short','End_of_Month','Relative_Offset','Studio','Revenue']]
#revpipe_final1
#revpipe_forecast1 = pd.merge(revpipe_forecast, revpipe_final, how='left', on=['Snapshot_Date_Short','End_of_Month','Relative_Month_Offset','Studio'])

In [None]:
**************************************************************
#Append History Revenue with Forecast
revexphist2 = pd.read_csv("revexphist1.csv")
append_revforcast = revexphist2.append(df2, ignore_index=True, sort=True)
#pd.merge(combine_df2, revhistorydiff, how='inner', on=['End_of_Month','Studio'])
append_revforcast['EOM1'] = pd.to_datetime(append_revforcast['End_of_Month']).dt.date.astype(str)
append_revforcast['End_of_Month'] = pd.to_datetime(append_revforcast['End_of_Month'])
#append_revforcast['Relative_Month_Offset'] = append_revforcast['End_of_Month'].dt.to_period('M') - pd.to_datetime('today').to_period('M')
append_revforcast['Relative_Month_Offset'] = round((append_revforcast['End_of_Month'] - current_eom)/np.timedelta64(1,'M'),0)
cols = append_revforcast.filter(like='Relative_Month_Offset', axis=1).columns
coerce_df_columns_to_numeric(append_revforcast, cols)
append_revforcast = append_revforcast.replace(np.nan, 0, regex=True)
cols = list(append_revforcast.select_dtypes(include='float64').columns)
cols_float1 = append_revforcast.filter(like='Rate', axis=1).columns
cols_float2 = append_revforcast.filter(like='Yield', axis=1).columns
cols_float3 = append_revforcast.filter(like='Diff%', axis=1).columns
cols_float = addlist(cols_float1,cols_float2)
cols_float = addlist(cols_float,cols_float3)
coerce_df_columns_to_numeric(append_revforcast, cols)
#cols = ['Revenue','forecast','origin','Relative_Month_Offset']
#coerce_df_columns_to_numeric(append_revforcast, cols)
append_revforcast['Revenue_Forecast'] = append_revforcast['Revenue'] + append_revforcast['forecast']
append_revforcast = append_revforcast.sort_values(by=['End_of_Month','Studio']).reset_index(drop=True)
append_revforcast =append_revforcast.replace(np.nan, 0, regex=True)
append_revforcast = append_revforcast.pivot_table(index=['End_of_Month','Relative_Month_Offset','Studio'],values=['Revenue','forecast','Revenue_Forecast'],aggfunc=sum)
append_revforcast = append_revforcast.reset_index(level=append_revforcast.index.names)
#write to file
#append_revforcast.to_csv("append_revforcast.csv", index=False)
display("append_revforcast info:")
show_stats(append_revforcast)
display(append_revforcast.query('Studio==@studio_debug'))

In [None]:
#Merge with Pipeline when month and snapshot month are same
pipehist21 = pd.read_csv("pipetalentrev_final.csv")
pipehist21['End_of_Month'] = pd.to_datetime(pipehist21['End_of_Month'])
cols = list(pipehist21.select_dtypes(include='float64').columns)
cols_float1 = pipehist21.filter(like='Rate', axis=1).columns
cols_float2 = pipehist21.filter(like='Yield', axis=1).columns
cols_float3 = pipehist21.filter(like='Diff%', axis=1).columns
cols_float = addlist(cols_float1,cols_float2)
cols_float = addlist(cols_float,cols_float3)
#display(pipehist21.info())
coerce_df_columns_to_numeric(pipehist21, cols)
#Merge revenue forecast with pipehistory
forcastrevpipe = pd.merge(pipehist21, append_revforcast, how='left', on=['End_of_Month','Relative_Month_Offset','Studio','Revenue'])
forcastrevpipe = forcastrevpipe.replace(np.nan, 0, regex=True)
forcastrevpipe['End_of_Month'] = pd.to_datetime(forcastrevpipe['End_of_Month'])
forcastrevpipe['EOM1'] = pd.to_datetime(forcastrevpipe['End_of_Month']).dt.date.astype(str)
forcastrevpipe['Relative_Month_Offset'] = round((forcastrevpipe['End_of_Month'] - current_eom)/np.timedelta64(1,'M'),0)
cols = forcastrevpipe.filter(like='Relative_Month_Offset', axis=1).columns
coerce_df_columns_to_numeric(forcastrevpipe, cols)
#show_stats(forcastrevpipe)
forcastrevpipe['Revenue_Forecast'] = forcastrevpipe['Revenue'] + forcastrevpipe['forecast']
forcastrevpipe['Diff'] = forcastrevpipe['Revenue_Forecast'] - forcastrevpipe['Pipeline']
forcastrevpipe['Diff%'] = round(forcastrevpipe['Diff'] / forcastrevpipe['Pipeline'],4)
cols = list(forcastrevpipe.select_dtypes(include='float64').columns)
cols_float1 = forcastrevpipe.filter(like='Rate', axis=1).columns
cols_float2 = forcastrevpipe.filter(like='Yield', axis=1).columns
cols_float3 = forcastrevpipe.filter(like='Diff%', axis=1).columns
cols_float = addlist(cols_float1,cols_float2)
cols_float = addlist(cols_float,cols_float3)
coerce_df_columns_to_numeric(forcastrevpipe, cols)
forcastrevpipe.replace([np.inf, -np.inf], np.nan, inplace=True)
forcastrevpipe = forcastrevpipe.replace(np.nan, 0, regex=True)
display("forcastrevpipe info:")
show_stats(forcastrevpipe)
forcastrevpipe.to_csv("forcastrevpipe.csv")
display(forcastrevpipe.query('Studio==@studio_debug'))

In [None]:
#pipehist21.drop(columns=['Snapshot_Date_Short','Relative_Snapshot_Month_Offset'], axis=1, inplace = True)
#pipe_offset = ['Pipeline-00','Pipeline-01','Pipeline-02','Pipeline-03',
#               'Pipeline-04','Pipeline-05','Pipeline-06','Pipeline-07',
#               'Pipeline-08','Pipeline-09','Pipeline-10','Pipeline-11'
#              ]
#forcastrevpipe1 = forcastrevpipe.query('(Relative_Offset in  @pipe_offset) or (Relative_Snapshot_Month_Offset == 0 and Relative_Month_Offset > 0)').reset_index(drop=True)
forcastrevpipe1 = forcastrevpipe.query('EOM1 >= "2018-07-01"')
forcastrevpipe_pivot1 = forcastrevpipe1.pivot_table(index=['End_of_Month','Relative_Month_Offset','EOM1','Studio'],# columns=[],
                       values=['Revenue','Revenue_Forecast','Pipeline','Diff','Diff%'],
                       aggfunc={'Revenue':np.sum,'Revenue_Forecast':np.sum,'Pipeline':np.sum,'Diff':np.sum,'Diff%':np.mean},
                       margins=False)#.reset_index()

forcastrevpipe_pivot1 = forcastrevpipe_pivot1.reset_index(level=forcastrevpipe_pivot1.index.names)
forcastrevpipe_pivot1 = forcastrevpipe_pivot1.query('Relative_Month_Offset < @forecast_horizon')
cols = forcastrevpipe_pivot1.filter(like='Relative_Month_Offset', axis=1).columns
coerce_df_columns_to_numeric(forcastrevpipe_pivot1, cols)
#cols = ['Revenue','forecast','origin','Relative_Month_Offset','Revenue_Forecast','Pipeline','Diff']
forcastrevpipe_pivot1 = forcastrevpipe_pivot1.sort_values(by=['End_of_Month','Studio']).reset_index(drop=True)
#forcastrevpipe['Snapshot_Date_Short'] = pd.to_datetime(forcastrevpipe['Snapshot_Date_Short'])
print("\n forcastrevpipe_pivot1: \n")
#display(forcastrevpipe1.info())
#write to file
forcastrevpipe.to_csv("forcastrevpipe_pivot1.csv", index=False)
display("forcastrevpipe_pivot1 info:")
show_stats(forcastrevpipe_pivot1)
display(forcastrevpipe_pivot1.query('Studio==@studio_debug'))

In [None]:
# Analyze by end of month pipeline - revenue Diff%
fig, ax = plt.subplots(figsize=(20,11))
plt.xticks(rotation=-45)
sns.lineplot(x= 'Relative_Month_Offset', y= 'Diff%', hue='Studio', #style='Studio',
             data=forcastrevpipe_pivot1,
             palette="Accent", 
             ci=None, marker="o")
#sns.lineplot(x= 'End_of_Month', y= 'Revenue_Forecast', hue='Studio', #style='Studio',
#             data=forcastrevpipe.query('Relative_Month_Offset >= 0'), 
#             #palette=palette, 
#             ci=None, linewidth=4, marker="o")
#xs=forcastrevpipe['Relative_Month_Offset']
#ys=forcastrevpipe['Diff%']
#for x,y in zip(xs,ys):

#    label = "{:,.0f}".format(y)# + "K"

#    ax.annotate(label, # this is the text
#                 (x,y), # this is the point to label
#                 textcoords="offset points", # how to position the text
#                 xytext=(0,10), # distance from text to points (x,y)
#                 color='black',
#                 ha='center') # horizontal alignment can be left, right or center

#sns.lineplot(x= 'End_of_Month', y= 'forecast', #hue='Studio', style='Studio',
#             data=combine_histforcast, color="r", ci=None)
#ys=combine_histforcast['forecast']
#for x,y in zip(xs,ys):

#    label = "{:.0f}".format(y)

#    ax.annotate(label, # this is the text
#                 (x,y), # this is the point to label
#                 textcoords="offset points", # how to position the text
#                 xytext=(0,10), # distance from text to points (x,y)
#                 color='r',
#                 ha='center') # horizontal alignment can be left, right or center
#ax.legend(['Revenue', 'Revenue forecast'], facecolor='w')
#plt.axvline(x=current_eom, linewidth=2, color='r')
ax.set_title(label="Distribution of Diff% between Revenue - Forecast by Offset, Months", fontsize=20)
plt.show()

In [None]:

forcastrevpipe_pivot2 = forcastrevpipe_pivot1.copy()
forcastrevpipe_pivot2 =forcastrevpipe_pivot2.replace(np.nan, 0, regex=True)
#forcastrevpipe_pivot2 = forcastrevpipe_pivot2.reset_index(level=forcastrevpipe_pivot2.index.names)
forcastrevpipe_pivot2.info()
forcastrevpipe_pivot2

In [None]:
forcastrevpipe_pivot21 = forcastrevpipe_pivot1.query('Relative_Month_Offset >=0').pivot_table(index=['Studio','Relative_Month_Offset'], columns=['EOM1'],
                       values=['Revenue', 'Revenue_Forecast','Pipeline','Diff','Diff%'],
                       aggfunc={'Revenue':np.sum,'Revenue_Forecast':np.sum,'Pipeline':np.sum,'Diff':np.sum,'Diff%':np.mean},
                       margins=False)#.reset_index()
#forcastrevpipe1['Revenue_Forecast'] = round(forcastrevpipe1['Revenue_Forecast']/1000,3)
forcastrevpipe_pivot21 = forcastrevpipe_pivot21.replace(np.nan, 0, regex=True)
forcastrevpipe_pivot21 = forcastrevpipe_pivot21.reset_index(level=forcastrevpipe_pivot21.index.names)
forcastrevpipe_pivot21.info()
forcastrevpipe_pivot21.head()

In [None]:
# multiple line plot
#Set themes
import matplotlib.pyplot as plt
#plot
sns.set()
sns.set_context("poster") #In order of relative size they are: paper, notebook, talk, and poster. Notebook is default
sns.set(style="whitegrid") #Seaborn has five built-in themes to style its plots: darkgrid, whitegrid, dark, white, and ticks. 
# Save a palette to a variable:
palette = sns.color_palette("Accent",8)
# Use palplot and pass in the variable:
#sns.palplot(palette)
# Set the palette using the name of a palette:
#sns.set_palette(palette)

In [None]:
fig, ax = plt.subplots(figsize=(24,9))
plt.xticks(rotation=-45)
#today_eom = today + pd.offsets.MonthEnd(0) 
forcastrevpipe22 = forcastrevpipe_pivot1.groupby(by=['End_of_Month','Relative_Month_Offset','EOM1'], as_index=False)
forcastrevpipe22 = forcastrevpipe22.sum().reset_index(drop=True)
#forcastrevpipe22 = forcastrevpipe22.query('EOM1 >= "2018-07-01"')
display(forcastrevpipe22)
div_by_thousand = 1000
div_by_mil = 1000000
forcastrevpipe22[['Revenue_Forecast','Pipeline','Diff']] = forcastrevpipe22[['Revenue_Forecast','Pipeline','Diff']].div(div_by_mil, axis=0)
sns.lineplot(x= 'End_of_Month', y= 'Revenue_Forecast', #hue='Relative_Month_Offset', style='Relative_Month_Offset',
             data=forcastrevpipe22.query('Relative_Month_Offset <= 0'), color="grey", ci=None, marker="o")
sns.lineplot(x= 'End_of_Month', y= 'Revenue_Forecast', #hue='Relative_Month_Offset', style='Relative_Month_Offset',
             data=forcastrevpipe22.query('Relative_Month_Offset >= 0'), color="g", ci=None, linewidth=4, marker="o")
xs=forcastrevpipe22['End_of_Month']
ys=forcastrevpipe22['Revenue_Forecast']
for x,y in zip(xs,ys):

    label = "{:,.1f}".format(y)# + "K"

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='black',
                 ha='center') # horizontal alignment can be left, right or center

sns.lineplot(x= 'End_of_Month', y= 'Pipeline', #hue='Studio', style='Studio',
             data=forcastrevpipe22, color="b", ci=None, marker="o")
#ys=combine_histforcast['forecast']
#for x,y in zip(xs,ys):

#    label = "{:.0f}".format(y)

#    ax.annotate(label, # this is the text
#                 (x,y), # this is the point to label
#                 textcoords="offset points", # how to position the text
#                 xytext=(0,10), # distance from text to points (x,y)
#                 color='r',
#                 ha='center') # horizontal alignment can be left, right or center
ax.legend(['Revenue','Forecast', 'Pipeline'], facecolor='w')
plt.axvline(x=current_eom, linewidth=2, color='r')
ax.set_title(label="Revenue and Forecast by Months (in Millions)", fontsize=20)
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(24,9))
values = forcastrevpipe22['Relative_Month_Offset'].unique()
clrs = ['grey' if (x < 0) else 'g' for x in values ]
forcastrevpipe21=forcastrevpipe22.copy()
#forcastrevpipe2['Revenue_Forecast'] = round(forcastrevpipe2['Revenue_Forecast']/div_by_mil,4)
#forcastrevpipe2['Diff'] = round(forcastrevpipe2['Diff']/div_by_mil,4)
g = sns.barplot(x="EOM1", y="Revenue_Forecast", ci=None,
                data=forcastrevpipe21,
                palette=clrs, #alpha=0.50,
                ax=ax
               )
#g.map(plt.axhline, y=today, ls='--', c='red')
plt.xticks(rotation=90)
#plt.axvline(x=today_eom, linewidth=2, color='r')
plt.title("Revenue & Forecast by Month (in Millions)",fontsize =18)
for p in g.patches:
    g.annotate("{:,.1f}".format(p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
#ax2 = ax.twinx()    
#sns.lineplot(x= 'EOM1', y= 'Diff', #hue='Studio',# style='Studio',
#             data=forcastrevpipe2, ci=None, 
#             color="r", marker="o", 
#            ax=ax2)   
#xs=forcastrevpipe2['EOM1']
#ys=forcastrevpipe2['Diff']
#for x,y in zip(xs,ys):

#    label = "{:,.0f}".format(y*1000) + "K"

#    ax2.annotate(label, # this is the text
#                 (x,y), # this is the point to label
#                 textcoords="offset points", # how to position the text
#                 xytext=(0,10), # distance from text to points (x,y)
#                 color='r',
#                 ha='center') # horizontal alignment can be left, right or center
g.legend(['Revenue','Forecast'], facecolor='w')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(24,9))
forcastrevpipe21=forcastrevpipe22.copy().query('EOM1>="2018-07-01"')
values = forcastrevpipe21['Relative_Month_Offset'].unique()
clrs = ['grey' if (x < 0) else 'g' for x in values ]
#forcastrevpipe2['Revenue_Forecast'] = round(forcastrevpipe2['Revenue_Forecast']/div_by_mil,3)
#forcastrevpipe2['Diff'] = round(forcastrevpipe2['Diff']/div_by_mil,3)
sns.lineplot(x= 'EOM1', y= 'Pipeline', #hue='Studio',# style='Studio',
             data=forcastrevpipe21, ci=None, 
             color="b", marker="o", 
            ax=ax) 
g = sns.barplot(x="EOM1", y="Revenue_Forecast", ci=None,
                data=forcastrevpipe21,
                palette=clrs, alpha=0.50,
                ax=ax
               )
#g.map(plt.axhline, y=today, ls='--', c='red')
plt.xticks(rotation=90)
#plt.axvline(x=today_eom, linewidth=2, color='r')
plt.title("Difference between Revenue_Forecast and Pipeline by Month (in Millions)",fontsize =18)
for p in g.patches:
    g.annotate("{:,.1f}".format(p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points', alpha=0.5)
ax2 = ax.twinx()    
sns.lineplot(x= 'EOM1', y= 'Diff', #hue='Studio',# style='Studio',
             data=forcastrevpipe21, ci=None, 
             color="r", marker="o", 
            ax=ax2)   
xs=forcastrevpipe21['EOM1']
ys=forcastrevpipe21['Diff']
for x,y in zip(xs,ys):

    label = "{:,.2f}".format(y) + "K"

    ax2.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='r',
                 ha='center') # horizontal alignment can be left, right or center
ax.legend(['Pipeline','Revenue'], facecolor='w')
#ax2.legend(['Diff'], facecolor='w')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(20,11))
plt.xticks(rotation=-45)
sns.lineplot(x= 'End_of_Month', y= 'Revenue_Forecast', hue='Studio', #style='Studio',
             data=forcastrevpipe_pivot1,
             palette="Accent", 
             ci=None, marker="o")
#sns.lineplot(x= 'End_of_Month', y= 'Revenue_Forecast', hue='Studio', #style='Studio',
#             data=forcastrevpipe.query('Relative_Month_Offset >= 0'), 
#             #palette=palette, 
#             ci=None, linewidth=4, marker="o")
xs=forcastrevpipe_pivot1['End_of_Month']
ys=forcastrevpipe_pivot1['Revenue_Forecast']
#for x,y in zip(xs,ys):

#    label = "{:,.0f}".format(y)# + "K"

#    ax.annotate(label, # this is the text
#                 (x,y), # this is the point to label
#                 textcoords="offset points", # how to position the text
#                 xytext=(0,10), # distance from text to points (x,y)
#                 color='black',
#                 ha='center') # horizontal alignment can be left, right or center

#sns.lineplot(x= 'End_of_Month', y= 'forecast', #hue='Studio', style='Studio',
#             data=combine_histforcast, color="r", ci=None)
#ys=combine_histforcast['forecast']
#for x,y in zip(xs,ys):

#    label = "{:.0f}".format(y)

#    ax.annotate(label, # this is the text
#                 (x,y), # this is the point to label
#                 textcoords="offset points", # how to position the text
#                 xytext=(0,10), # distance from text to points (x,y)
#                 color='r',
#                 ha='center') # horizontal alignment can be left, right or center
#ax.legend(['Revenue', 'Revenue forecast'], facecolor='w')
plt.axvline(x=current_eom, linewidth=2, color='r')
ax.set_title(label="Revenue and Forecast by Studio, Months", fontsize=20)
plt.show()

In [None]:
values = forcastrevpipe_pivot1['Relative_Month_Offset'].unique()
clrs = ['grey' if (x < 0) else 'g' for x in values ]
g = sns.catplot(x="End_of_Month", y="Revenue_Forecast", col="Studio", 
                col_wrap=2, legend=True, margin_titles=True,
                data=forcastrevpipe_pivot1, kind="bar", ci=None,
                height=5, aspect=2.5, 
                palette=clrs
               ).set_xticklabels(rotation=-45)
#g.map(plt.axhline, y=today_eom, ls='--', c='red')
g.set_xticklabels(rotation=-45)
plt.subplots_adjust(top=0.95)
plt.suptitle('Revenue and Forecast',fontsize=24)
#axes = g.axes
#for p in axes.patches:
#    axes.annotate('{:.0f}'.format(100*p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), 
#               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
plt.show()

In [None]:
#Combine with diff_mean2
diff_mean1 = pd.read_csv('diff_mean.csv')
#forcastrevpipe1['Revenue_Forecast'] = round(forcastrevpipe1['Revenue_Forecast']/1000,3)
diff_mean1['Relative_Month_Offset'] = pd.to_numeric(diff_mean1['Relative_Offset'].str[-3:])*-1
diff_mean2 = pd.merge(forcastrevpipe_pivot1, diff_mean1, how='left', on=['Relative_Month_Offset','Studio'])
diff_mean2['New_Forecast'] = (diff_mean2[['Revenue_Forecast','Pipeline']].max(axis=1))*(1+diff_mean2['Mean'])
diff_mean2 = diff_mean2.replace(np.nan, 0, regex=True)
#write to file
#diff_mean2.to_csv("diff_mean2.csv", index=False)
display("diff_mean2 info:")
show_stats(diff_mean2)
diff_mean2.query('Studio==@studio_debug')

In [None]:
values = diff_mean2['Relative_Month_Offset'].unique()
clrs = ['grey' if (x < 0) else 'g' for x in values ]
g = sns.catplot(x="Relative_Month_Offset", y="New_Forecast",# col="Studio", 
                #col_wrap=2, legend=True, margin_titles=True,
                data=diff_mean2, kind="bar", ci=None,
                height=10, aspect=2.5, 
                palette=clrs
               ).set_xticklabels(rotation=-45)
#g.map(plt.axhline, y=today_eom, ls='--', c='red')
g.set_xticklabels(rotation=-45)
plt.subplots_adjust(top=0.95)
plt.suptitle('Revenue and Forecast',fontsize=24)
axes = g.ax
for p in axes.patches:
    axes.annotate('{:.0f}'.format(100*p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
plt.show()

In [None]:
Combine with input dataset
#df.columns = df.columns.astype(str).str.replace(" ", "_")
combine_df = pd.merge(df3, df, how='inner', on=['End_of_Month','Studio'])
combine_df2 = combine_df.copy()

combine_df2["Pipeline000-_Diff"] = (combine_df2['forecast']-combine_df2["Pipeline000-"])
combine_df2["Pipeline000-_Diff%"] = round(combine_df2["Pipeline000-_Diff"]/combine_df2["Pipeline000-"],4)
combine_df2["Pipeline001-_Diff"] = (combine_df2['forecast']-combine_df2["Pipeline001-"])
combine_df2["Pipeline001-_Diff%"] = round(combine_df2["Pipeline001-_Diff"]/combine_df2["Pipeline001-"],4)
combine_df2["Pipeline002-_Diff"] = (combine_df2['forecast']-combine_df2["Pipeline002-"])
combine_df2["Pipeline002-_Diff%"] = round(combine_df2["Pipeline002-_Diff"]/combine_df2["Pipeline002-"],4)
combine_df2["Pipeline003-_Diff"] = (combine_df2['forecast']-combine_df2["Pipeline003-"])
combine_df2["Pipeline003-_Diff%"] = round(combine_df2["Pipeline003-_Diff"]/combine_df2["Pipeline003-"],4)
combine_df2 = combine_df2.replace(np.nan, 0, regex=True)
#combine_df2 = combine_df2.sort_index(axis=1)
#write to file
combine_df2.to_csv("revforecastpredicted.csv", index=False)
display("combine_df2 info:")
show_stats(combine_df2)
display(combine_df2)

In [None]:
combine_df2.query('Relative_Month_Offset==0').pivot_table(index=['End_of_Month', 'Studio'],# columns=[],
                       values=["forecast", "Pipeline000-", "Pipeline000-_Diff", "Pipeline000-_Diff%",
                              "Pipeline001-", "Pipeline001-_Diff", "Pipeline001-_Diff%",
                              "Pipeline002-", "Pipeline002-_Diff", "Pipeline002-_Diff%",
                              "Pipeline003-", "Pipeline003-_Diff", "Pipeline003-_Diff%"
                              ],
                       aggfunc={"forecast": np.sum, "Pipeline000-": np.sum, "Pipeline000-_Diff": np.sum, "Pipeline000-_Diff%": np.mean,
                                "Pipeline001-": np.sum, "Pipeline001-_Diff": np.sum, "Pipeline001-_Diff%": np.mean,
                                "Pipeline002-": np.sum, "Pipeline002-_Diff": np.sum, "Pipeline002-_Diff%": np.mean,
                                "Pipeline003-": np.sum, "Pipeline003-_Diff": np.sum, "Pipeline003-_Diff%": np.mean
                               },
                       margins=True)#.reset_index()

In [None]:
#Set themes
import matplotlib.pyplot as plt
#plot
sns.set()
sns.set_context("talk") #In order of relative size they are: paper, notebook, talk, and poster. Notebook is default
sns.set(style="whitegrid") #Seaborn has five built-in themes to style its plots: darkgrid, whitegrid, dark, white, and ticks. 
# Save a palette to a variable:
palette = sns.color_palette("Accent",8)
# Use palplot and pass in the variable:
sns.palplot(palette)
# Set the palette using the name of a palette:
sns.set_palette(palette)

In [None]:
# multiple line plot
fig, ax = plt.subplots(figsize=(20,11))
plt.xticks(rotation=-45)

combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==0')
sns.lineplot(x= 'Studio', y= 'forecast', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="g", ci=None)
xs=combine_df2_filter0['Studio']
ys=combine_df2_filter0['forecast']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='g',
                 ha='center') # horizontal alignment can be left, right or center
sns.lineplot(x= 'Studio', y= 'Pipeline000-', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="b", ci=None)
sns.lineplot(x= 'Studio', y= 'Pipeline000-_Diff', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="r", ci=None)
ys=combine_df2_filter0['Pipeline000-_Diff']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='r',
                 ha='center') # horizontal alignment can be left, right or center
#sns.lineplot(x= 'End_of_Month', y= 'Pipeline002-',
#             data=combine_df2, color="orange") 
ax.legend(['forecast', 'Pipeline000-','Pipeline000-_Diff'], facecolor='w')
ax.set_title(label="Current Month Forecast, Pipeline000- and Difference for " + combine_df2_filter0['EOM1'].max(), fontsize=20)
plt.show()

In [None]:
# multiple line plot
fig, ax = plt.subplots(figsize=(20,11))
plt.xticks(rotation=-45)

combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==0')
sns.lineplot(x= 'Studio', y= 'forecast', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="g", ci=None)
xs=combine_df2_filter0['Studio']
ys=combine_df2_filter0['forecast']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='g',
                 ha='center') # horizontal alignment can be left, right or center
sns.lineplot(x= 'Studio', y= 'Pipeline001-', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="b", ci=None)
sns.lineplot(x= 'Studio', y= 'Pipeline001-_Diff', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="r", ci=None)
ys=combine_df2_filter0['Pipeline001-_Diff']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='r',
                 ha='center') # horizontal alignment can be left, right or center
#sns.lineplot(x= 'End_of_Month', y= 'Pipeline002-',
#             data=combine_df2, color="orange") 
ax.legend(['forecast', 'Pipeline001-','Pipeline001-_Diff'], facecolor='w')
ax.set_title(label="Current Month Forecast, Pipeline001- and Difference for " + combine_df2_filter0['EOM1'].max(), fontsize=20)
plt.show()

In [None]:
# multiple line plot
fig, ax = plt.subplots(figsize=(20,11))
plt.xticks(rotation=-45)

combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==1')
sns.lineplot(x= 'Studio', y= 'forecast', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="g", ci=None)
xs=combine_df2_filter0['Studio']
ys=combine_df2_filter0['forecast']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='g',
                 ha='center') # horizontal alignment can be left, right or center
sns.lineplot(x= 'Studio', y= 'Pipeline001-', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="b", ci=None)
sns.lineplot(x= 'Studio', y= 'Pipeline001-_Diff', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="r", ci=None)
ys=combine_df2_filter0['Pipeline001-_Diff']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='r',
                 ha='center') # horizontal alignment can be left, right or center
#sns.lineplot(x= 'End_of_Month', y= 'Pipeline002-',
#             data=combine_df2, color="orange") 
ax.legend(['forecast', 'Pipeline001-','Pipeline001-_Diff'], facecolor='w')
ax.set_title(label="Next Month Forecast, Pipeline001- and Difference for " + combine_df2_filter0['EOM1'].max(), fontsize=20)
plt.show()

In [None]:
# multiple line plot
fig, ax = plt.subplots(figsize=(20,10))
plt.xticks(rotation=-45)

combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==1')
sns.lineplot(x= 'Studio', y= 'forecast', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="g", ci=None)
xs=combine_df2_filter0['Studio']
ys=combine_df2_filter0['forecast']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='g',
                 ha='center') # horizontal alignment can be left, right or center
sns.lineplot(x= 'Studio', y= 'Pipeline002-', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="b", ci=None)
sns.lineplot(x= 'Studio', y= 'Pipeline002-_Diff', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="r", ci=None)
ys=combine_df2_filter0['Pipeline002-_Diff']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='r',
                 ha='center') # horizontal alignment can be left, right or center
#sns.lineplot(x= 'End_of_Month', y= 'Pipeline002-',
#             data=combine_df2, color="orange") 
ax.legend(['forecast', 'Pipeline002-','Pipeline002-_Diff'], facecolor='w')
ax.set_title(label="Next to next Month Forecast, Pipeline002- and Difference for " + combine_df2_filter0['EOM1'].max(), fontsize=20)
plt.show()

In [None]:
g=sns.catplot(x="Studio", y="Pipeline000-_Diff", hue="End_of_Month", 
                #col_wrap=3, legend=True,
                data=combine_df2, kind="bar", ci=None,
                height=12, aspect=2, palette=palette).set_xticklabels(rotation=-45)
g.set(title="Pipeline Diff by Studio, Month")
axes = g.ax
#axes.set_ylim(0,1500000)

In [None]:
g = sns.catplot(x="End_of_Month", y="Pipeline000-_Diff", hue="Studio", 
                #col_wrap=3, legend=True,
                data=combine_df2, kind="bar", ci=None, legend=True,
                height=12, aspect=2, palette=palette)
g.set_xticklabels(rotation=-45)
g.set(title="Pipeline Diff by Month, Studio")
axes = g.ax
#axes.set_ylim(0,150)

In [None]:
#plt.figure(figsize = (24,12))
sns.relplot(data=combine_df2, x="Pipeline000-", y="forecast", hue='Studio', #col="End_of_Month", col_wrap=3,
                  kind="line", palette="Set1", height=5, aspect=1)

In [None]:
sns.catplot(x="Studio", y="forecast", col="End_of_Month", 
                col_wrap=3, legend=True, margin_titles=True,
                data=combine_df2, kind="bar", ci=None,
                height=5, aspect=2.5, palette=palette).set_xticklabels(rotation=-45)
sns.catplot(x="End_of_Month", y="forecast", col="Studio", 
                col_wrap=3, legend=True, margin_titles=True,
                data=combine_df2, kind="bar", ci=None,
                height=5, aspect=2.5, palette=palette).set_xticklabels(rotation=-45)
################################

In [None]:
#LONG TO WIDE
#combine_df2_grp = combine_df2.groupby(['Studio','End_of_Month'], as_index=True).agg({"forecast":"sum", "Pipeline":"sum", "Pipeline Diff":"sum", "Pipeline Diff%":"mean"}, margins=True).fillna(0)#.reset_index()
#display(combine_df2_grp)
#combine_df2_pivot = combine_df2.pivot_table(index='Studio', columns='End_of_Month', values=["forecast", "Pipeline000-", "Pipeline000- Diff", "Pipeline000- Diff%"], margins=False)
#combine_df2_pivot.columns.name=None
#combine_df2_pivot = combine_df2_pivot.reset_index()
#pd.set_option('display.float_format', lambda x: '%.1f' % x)
#print("\n combine_df2_pivot\n")
#combine_df2_pivot.to_csv("combine_df2_pivot.csv", index=False)
#combine_df2

In [None]:
# multiple line plot
fig, ax = plt.subplots(figsize=(20,11))
plt.xticks(rotation=-45)

combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==0')
sns.lineplot(x= 'Studio', y= 'forecast', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="g", ci=None, linewidth=4, alpha=0.7)
xs=combine_df2_filter0['Studio']
ys=combine_df2_filter0['forecast']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='g',
                 ha='center') # horizontal alignment can be left, right or center
sns.lineplot(x= 'Studio', y= 'Pipeline001-', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="b", ci=None)
sns.lineplot(x= 'Studio', y= 'Pipeline002-', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="orange", ci=None)
sns.lineplot(x= 'Studio', y= 'Pipeline003-', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="y", ci=None)
#ys=combine_df2_filter0['Pipeline001-_Diff']
#for x,y in zip(xs,ys):

#    label = "{:.0f}".format(y)

#    ax.annotate(label, # this is the text
#                 (x,y), # this is the point to label
#                 textcoords="offset points", # how to position the text
#                 xytext=(0,10), # distance from text to points (x,y)
#                 color='r',
#                 ha='center') # horizontal alignment can be left, right or center
#sns.lineplot(x= 'End_of_Month', y= 'Pipeline002-',
#             data=combine_df2, color="orange") 
ax.legend(['forecast', 'Pipeline001-','Pipeline002-','Pipeline003-'], facecolor='w')
ax.set_title(label="Current Month Forecast, Pipeline001-, 02, 03 for " + combine_df2_filter0['EOM1'].max(), fontsize=20)
plt.show()

In [None]:
combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==0')
g = sns.pairplot(combine_df2_filter0, 
             x_vars=['Pipeline000-', 'Pipeline001-','Pipeline002-','Pipeline003-'], 
             y_vars=['forecast'],
             kind="reg", height=5, palette=palette)
fig=g.fig
#fig.xticklabels(rotation=-45)
fig.subplots_adjust(top=0.93, wspace=0.1)
fig.suptitle("Forecast vs current and previous months Pipeline " + combine_df2_filter0['End_of_Month'].astype(str).max(), size=14, fontweight='bold') 

In [None]:
combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==0')
g = sns.catplot(x="Studio", y="Pipeline000-_Diff", 
                data=combine_df2_filter0, kind="bar",
                height=8, aspect=2.5, palette=palette)
g.set_xticklabels(rotation=-45)
g.ax.set_title(label="Forecast and Pipeline000- Diff by Studio " + combine_df2_filter0['End_of_Month'].astype(str).max(), fontsize=20) 
axes = g.ax
for p in axes.patches:
    axes.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

In [None]:
combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==0')
g = sns.catplot(x="Studio", y="Pipeline001-_Diff", 
                data=combine_df2_filter0, kind="bar",
                height=8, aspect=2.5, palette=palette)
g.set_xticklabels(rotation=-45)
g.ax.set_title(label="Forecast and Pipeline001- Diff by Studio " + combine_df2_filter0['End_of_Month'].astype(str).max(), fontsize=20) 
axes = g.ax
for p in axes.patches:
    axes.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

In [None]:
combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==1')
g = sns.catplot(x="Studio", y="Pipeline001-_Diff", 
                data=combine_df2_filter0, kind="bar",
                height=8, aspect=2.5, palette=palette)
g.set_xticklabels(rotation=-45)
g.ax.set_title(label="Forecast and Pipeline001- Diff by Studio for " + combine_df2_filter0['End_of_Month'].astype(str).max(), fontsize=20) 
axes = g.ax
for p in axes.patches:
    axes.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')


In [None]:
combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==0')
g = sns.catplot(x="Studio", y="Pipeline000-_Diff%", 
                data=combine_df2_filter0, kind="bar",
                height=8, aspect=2.5, palette=palette)
g.set_xticklabels(rotation=-45)
g.ax.set_title(label="Forecast and Pipeline000-_Diff% by Studio for " + combine_df2_filter0['End_of_Month'].astype(str).max(), fontsize=20)
axes = g.ax
for p in axes.patches:
    axes.annotate('{:.1f}%'.format(100*p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

In [None]:
combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==1')
g = sns.catplot(x="Studio", y="Pipeline001-_Diff%", 
                data=combine_df2_filter0, kind="bar",
                height=8, aspect=2.5, palette=palette)
g.set_xticklabels(rotation=-45)
g.ax.set_title(label="Forecast and Pipeline001-_Diff% by Studio for " + combine_df2_filter0['End_of_Month'].astype(str).max(), fontsize=20)
axes = g.ax
for p in axes.patches:
    axes.annotate('{:.1f}%'.format(100*p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

In [None]:
g=sns.catplot(x="Studio", y="forecast", hue="End_of_Month", 
                #col_wrap=3, legend=True,
                data=combine_df2, kind="bar", ci=None,
                height=12, aspect=2, palette=palette).set_xticklabels(rotation=-45)
g.set(title="Forecast by Studio, Month")
axes = g.ax
#axes.set_ylim(0,1500000)

In [None]:
plt.figure(figsize=(20, 10))
g = sns.barplot(x="Studio", y="forecast", 
            data=df3, palette=palette)
plt.xticks(rotation=-45)
plt.title("Forecast by Studio for Current Month",fontsize =18)
#plt.xlabel ('SQUARE KM',fontsize =24)
for p in g.patches:
    g.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

In [None]:
combine_df2_filter2 = combine_df2.query('Relative_Month_Offset<=2')
sns.catplot(x="forecast", y="Studio", 
                col="End_of_Month",ci=None,
                data=combine_df2, kind="bar", 
                height=5, aspect=1.5, palette=palette).set_xticklabels(rotation=-45)
sns.catplot(x="Pipeline000-", y="Studio", 
                col="End_of_Month",ci=None,
                data=combine_df2, kind="bar", 
                height=5, aspect=1.5, palette=palette).set_xticklabels(rotation=-45)
sns.catplot(x="Pipeline000-_Diff", y="Studio", 
                col="End_of_Month",ci=None,
                data=combine_df2, kind="bar", 
                height=5, aspect=1.5, palette=palette).set_xticklabels(rotation=-45)

In [None]:
plt.figure(figsize=(20, 10))
g = sns.barplot(x="Studio", y="forecast", 
            data=df3.query('EOM1=="2020-05-31"', palette=palette)
plt.xticks(rotation=-45)
plt.title("Forecast by Studio for May 2020",fontsize =18)
#plt.xlabel ('SQUARE KM',fontsize =24)
for p in g.patches:
    g.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

In [None]:
g = sns.catplot(x="forecast", y="Studio", col="End_of_Month", col_wrap=3, 
            data=df3.query('EOM1=="2020-06-30"', kind="bar",
           height=10, aspect=1.5, palette=palette).set_xticklabels(rotation=-45)

In [None]:
g = sns.catplot(x="forecast", y="Studio", col="End_of_Month", col_wrap=3, 
            data=df3, kind="bar",
           height=10, aspect=1.5, palette=palette).set_xticklabels(rotation=-45)

In [None]:
g = sns.catplot(x="forecast", y="EOM1", col="Studio", col_wrap=3, 
            data=df3, kind="bar",
           height=10, aspect=1.5, palette=palette)

In [None]:
g=sns.catplot(data=df3_pivot, kind="box",
            height=8, aspect=2.5, palette=palette, orient="h")
#g.ax.set_xlim(-50000,1000000)

In [None]:
g3 = sns.barplot(data=combine_df2.query('Relative_Month_Offset ==0'), y='Studio', x='Pipeline000-_Diff', ci="sd", palette=palette)
g3

In [None]:
g = sns.FacetGrid(combine_df2.query('Relative_Month_Offset <=3'), hue="End_of_Month", col="Studio", palette="Set1", col_wrap=3, margin_titles=True, height=6, aspect=1)
g = g.map(plt.scatter, "Pipeline", "forecast", s=100, linewidth=.5, edgecolor="white").add_legend()
g

In [None]:
#g1 = sns.FacetGrid(combine_df2, hue="Studio", col="End_of_Month", palette="Set1", col_wrap=2, margin_titles=True, height=10, aspect=2)
#g1 = g1.map(plt.scatter, "2020-04-ActRec", "forecast", s=100, linewidth=.5, edgecolor="white").add_legend()
#g1

In [None]:
g2 = sns.jointplot(data=combine_df2, x="Pipeline000-", y="forecast", kind='reg',  height=12)
g2 = sns.jointplot(data=combine_df2, x="Pipeline000-", y="forecast", kind='resid',  height=12)

In [None]:
g3 = sns.FacetGrid(combine_df2, hue="End_of_Month", palette="Set1", margin_titles=True, height=8, aspect=2)
g3 = g3.map(plt.scatter, "Pipeline001-", "forecast", s=100, linewidth=.5, edgecolor="white").add_legend()
g3

In [None]:
g2 = sns.jointplot(data=combine_df2, x="Pipeline001-", y="forecast", kind='reg',  height=8)
g2
g2 = sns.jointplot(data=combine_df2, x="Pipeline001-", y="forecast", kind='resid',  height=8)
g2


In [None]:
g3 = sns.FacetGrid(combine_df2, hue="End_of_Month", margin_titles=True, height=8, aspect=2)
g3 = g3.map(plt.scatter, "Pipeline002-", "forecast", s=100, linewidth=.5, edgecolor="white").add_legend()
g3

In [None]:
g2 = sns.jointplot(data=combine_df2, x="Pipeline002-", y="forecast", kind='reg',  height=8)
g2
g2 = sns.jointplot(data=combine_df2, x="Pipeline002-", y="forecast", kind='resid',  height=8)
g2