In [88]:
# azureml-core of version 1.0.72 or higher is required
# azureml-dataprep[pandas] of version 1.1.34 or higher is required!p
#!pip install seaborn
import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
import seaborn as sns

from pandas.io.json import json_normalize

In [89]:
#Definitions

today = pd.to_datetime('today').normalize()
current_eom = today + pd.offsets.MonthEnd(0)
start_date = "2017-01-01"
end_date = current_eom + pd.offsets.MonthEnd(11)
entity_debug = "CHN"
debug = True

def get_json(df):
    """ Small function to serialise DataFrame dates as 'YYYY-MM-DD' in JSON """

    def convert_timestamp(item_date_object):
        if isinstance(item_date_object, (datetime.date, datetime.datetime)):
            return item_date_object.strftime("%Y-%m-%d")
    
    dict_ = df.to_dict(orient="records")

    return json.dumps(dict_, default=convert_timestamp)

#display count and summary of any dataframe
pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)
pd.set_option('display.precision', 1)
pd.set_option('display.float_format', lambda x: '%.2f' % x)

def get_df_name(df):
    name =[x for x in globals() if globals()[x] is df][0]
    return name

def difflist(li1, li2): 
    return (list(set(li1) - set(li2)))

def addlist(li1, li2): 
    return (li1.append(li2))

def remove_percetage(df, column_list):
    for col in column_list:
        df[col] = round(df[col].str.replace('%','').astype(np.float64) / 100, 4)
    return df

def coerce_df_columns_to_numeric(df):
    cols_float1 = list(df.filter(like='Rate', axis=1).columns)
    cols_float2 = list(df.filter(like='Yield', axis=1).columns)
    cols_float3 = list(df.filter(like='Diff%', axis=1).columns)
    cols_float4 = list(df.filter(like='Relative_Offset', axis=1).columns)
    cols_float5 = list(df.filter(like='sp500', axis=1).columns)
    cols_float6 = list(df.filter(like='Return', axis=1).columns)
    cols_float = cols_float1 + cols_float2 + cols_float3 + cols_float4 + cols_float5 + cols_float6
    #display("cols_float:", cols_float)
    cols_int1 = list(df.filter(like='Revenue', axis=1).columns)
    cols_int2 = list(df.filter(like='Conversions', axis=1).columns)
    cols_int3 = list(df.filter(like='Value', axis=1).columns)
    cols_int4 = list(df.filter(like='Pipeline', axis=1).columns)
    cols_int5 = list(df.filter(like='Offset', axis=1).columns)
    cols_int6 = list(df.filter(like='Headcount', axis=1).columns)
    cols_int = cols_int1 + cols_int2 + cols_int3 + cols_int4 + cols_int5 + cols_int6
    #display("cols_int:", cols_int)
    cols1 = list(df.select_dtypes(include='float64').columns)
    cols = cols1 + cols_int
    #display("cols:", cols)
    final_cols = difflist(cols,cols_float)
    #display("final_cols:", final_cols)
    df[final_cols] = df[final_cols].apply(pd.to_numeric, errors='coerce')
    df[final_cols] = df[final_cols].replace(np.nan, 0, regex=True)
    df[final_cols] = df[final_cols].astype(int)
    df[final_cols] = round(df[final_cols],0)
    #return df

def data_prep(df):
    df.columns = df.columns.astype(str).str.replace("_", " ")
#    if 'End_of_Month' in df.columns:
#        df['End_of_Month'] = pd.to_datetime(df['End_of_Month']) #Format Date
#    if 'Snapshot_Date_Short' in df.columns:
#        df['Snapshot_Date_Short'] = pd.to_datetime(df['Snapshot_Date_Short']) #Format Date
#    df = df.query('Studio not in @exclude_studio')
#    df = df.replace(np.nan, 0, regex=True)
    return df

def show_stats(df):   
    print("\n DF Name: \n")
    display(get_df_name(df))
    print("\n DF Info: \n")
    display(df.info(verbose=True))
    print("\n DF Describe: \n")
    display(df.describe(include='all').transpose().head())
    print("\n DF Head: \n")
    display(df.head())
    print("\n DF Tail: \n")
    display(df.tail())
    group_by_studio = df.groupby(by=['Managing Studio Name'], as_index=False)
    studio_sum = group_by_studio.sum().reset_index(drop=True)
    studio_count = group_by_studio.count().reset_index(drop=True)
    print("\n Studio Sum: \n")
    display(studio_sum.head())
    print("\n Studio Count: \n")
    display(studio_count.head())
    return

In [90]:
# data prep
opp = pd.read_csv('OpportunityValueDuration.csv')
oppln = opp.replace(np.nan, 0, regex=True)

# add LN columns
#opp.rename(columns={"Forecast": "LnForecast"}, inplace = True)
oppln['Project Price_ Ln'] = np.log(oppln['Project Price_'])
oppln['Project Price Ln'] = np.log(oppln['Project Price'])
oppln['Current Project Duration Ln'] = np.log(oppln['Current Project Duration'])
oppln = oppln.replace(np.nan, 0, regex=True)
oppln.to_csv("OpportunityValueDurationLn.csv", index=False)
oppln[oppln.select_dtypes(include='float64').columns]

Unnamed: 0,Project Price_,Project Price,Current Project Duration,Project Price_ Ln,Project Price Ln,Current Project Duration Ln
0,1350000.00,195615.00,6.71,14.12,12.18,1.90
1,1196712.00,173523.24,3.71,14.00,12.06,1.31
2,300000.00,42240.00,1.71,12.61,10.65,0.54
3,260000.00,38532.00,1.71,12.47,10.56,0.54
4,350000.00,52115.00,0.71,12.77,10.86,-0.34
...,...,...,...,...,...,...
963,10000.00,10000.00,0.14,9.21,9.21,-1.97
964,150000.00,150000.00,49.00,11.92,11.92,3.89
965,50000.00,50000.00,1.71,10.82,10.82,0.54
966,32000.00,32000.00,1.71,10.37,10.37,0.54


In [91]:
# azureml-core of version 1.0.72 or higher is required
# azureml-dataprep[pandas] of version 1.1.34 or higher is required
# azureml-core of version 1.0.72 or higher is required
# azureml-dataprep[pandas] of version 1.1.34 or higher is required
from azureml.core import Workspace, Dataset

subscription_id = 'db61fd47-db56-45e3-844f-1b1f5c47990a'
resource_group = 'BI-DevQA-RG'
workspace_name = 'dwmlazwu01'

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(workspace, name='OppDurationPrediction')
dataset1 = Dataset.get_by_name(workspace, name='OppValueDuration')

df = dataset.to_pandas_dataframe()
#df['Close Date'] = pd.to_datetime(df['Close Date']).dt.date
#df.columns = df.columns.astype(str).str.replace("_", " ")
#df.rename(columns={"Current Monetary Value ": "Current Monetary Value_"}, inplace = True)
#df['Snapshot_Date_Short'] = pd.to_datetime(df['Snapshot_Date_Short'])
df = df.replace(np.nan, 0, regex=True)
#df.columns.name=None
df = df.reset_index(drop = True)

#df['End_of_Month'] = pd.to_datetime(df['End_of_Month'])
print("df :")
display(df.head())
df_org = dataset1.to_pandas_dataframe()
#df1.columns = df1.columns.astype(str).str.replace("_", " ")
#df1.rename(columns={"Current Monetary Value ": "Current Monetary Value_"}, inplace = True)
#df['Snapshot_Date_Short'] = pd.to_datetime(df['Snapshot_Date_Short'])
#df_org['Close Date'] = pd.to_datetime(df_org['Close Date']).dt.date
df_org = df_org.replace(np.nan, 0, regex=True)
#df.columns.name=None
df_org = df_org.reset_index(drop = True)
display(df_org.head())

df :


Unnamed: 0,Managing Fin Entity ID,Currency Code,Project Price_,Project Price,Project Price_ Ln,Project Price Ln
0,CHN,CNY,1350000.0,195615.0,14.12,12.183903720759112
1,CHN,CNY,1196712.0,173523.24,14.0,12.064066817525603
2,CHN,CNY,300000.0,42240.0,12.61,10.651122918380144
3,CHN,CNY,260000.0,38532.0,12.47,10.559244343877516
4,CHN,CNY,350000.0,52115.0,12.77,10.861208094173422


Unnamed: 0,Managing Fin Entity ID,Currency Code,Project Price_,Project Price,Current Project Duration,Project Price_ Ln,Project Price Ln
0,CHN,CNY,1350000.0,195615.0,6.71,14.12,12.183903720759112
1,CHN,CNY,1196712.0,173523.24,3.71,14.0,12.064066817525603
2,CHN,CNY,300000.0,42240.0,1.71,12.61,10.651122918380144
3,CHN,CNY,260000.0,38532.0,1.71,12.47,10.559244343877516
4,CHN,CNY,350000.0,52115.0,0.71,12.77,10.861208094173422


In [92]:
#pivot
#df_pivot = df.pivot_table(index=['Currency Code'], columns='Managing Fin Entity ID', values=['Project Price Ln'], aggfunc=np.sum, margins=False)
#df3_pivot1['EOM1'] = pd.to_datetime(df3_pivot1['End_of_Month']).dt.date.astype(str)
#df_pivot.columns.name=None
#df_pivot = df_pivot.reset_index()
#df_pivot = df_pivot.reset_index(level=df_pivot.index.names)
#df_pivot

In [93]:
#Limit for forecast horizon
#revpipe = df.copy()
#revpipe.drop(columns=['Revenue'], axis=1, inplace = True)
#forecast_horizon = 6 #months
#today = datetime.date.today()
#today_plus_month_offset = today + pd.offsets.MonthEnd(6)
#lt_plus_month_offset = pd.to_datetime(df1['End of Month']) <= today_plus_month_offset
#forecast_horizon1 = revpipe['Relative_Month_Offset'] < forecast_horizon
#revpipe = revpipe.query('Relative_Month_Offset < @forecast_horizon')
df

Unnamed: 0,Managing Fin Entity ID,Currency Code,Project Price_,Project Price,Project Price_ Ln,Project Price Ln
0,CHN,CNY,1350000.00,195615.00,14.12,12.183903720759112
1,CHN,CNY,1196712.00,173523.24,14.00,12.064066817525603
2,CHN,CNY,300000.00,42240.00,12.61,10.651122918380143
3,CHN,CNY,260000.00,38532.00,12.47,10.559244343877515
4,CHN,CNY,350000.00,52115.00,12.77,10.861208094173422
...,...,...,...,...,...,...
963,USA,USD,10000.00,10000.00,9.21,9.210340371976184
964,USA,USD,150000.00,150000.00,11.92,11.918390573078392
965,USA,USD,50000.00,50000.00,10.82,10.819778284410283
966,USA,USD,32000.00,32000.00,10.37,10.373491181781864


In [94]:
import datetime
from dateutil.tz import tzutc

from pandas.io.json import json_normalize

In [95]:
#data = revenueforecastdf.to_json(orient="records")
# Convert to JSON string
import numpy as np

def get_json(df):
    """ Small function to serialise DataFrame dates as 'YYYY-MM-DD' in JSON """

    def convert_timestamp(item_date_object):
        if isinstance(item_date_object, (datetime.date, datetime.datetime)):
            return item_date_object.strftime("%Y-%m-%d")
    
    dict_ = df.to_dict(orient="records")

    return json.dumps(dict_, default=convert_timestamp)

#df1 = df.replace(np.nan, 0, regex=True)

input_data = get_json(df)
#data_loads = json.loads(input_data)
#data_loads['data']
print("Input Data: ",type(input_data))
input_data = "{\"data\" :" + input_data + "}"
input_data[0:2000]

Input Data:  <class 'str'>


'{"data" :[{"Managing Fin Entity ID": "CHN", "Currency Code": "CNY", "Project Price_": 1350000.0, "Project Price": 195615.0, "Project Price_ Ln": 14.115615150414612, "Project Price Ln": "12.183903720759112"}, {"Managing Fin Entity ID": "CHN", "Currency Code": "CNY", "Project Price_": 1196712.0, "Project Price": 173523.24, "Project Price_ Ln": 13.995088354087166, "Project Price Ln": "12.064066817525603"}, {"Managing Fin Entity ID": "CHN", "Currency Code": "CNY", "Project Price_": 300000.0, "Project Price": 42240.0, "Project Price_ Ln": 12.611537753638338, "Project Price Ln": "10.651122918380143"}, {"Managing Fin Entity ID": "CHN", "Currency Code": "CNY", "Project Price_": 260000.0, "Project Price": 38532.0, "Project Price_ Ln": 12.468436909997665, "Project Price Ln": "10.559244343877515"}, {"Managing Fin Entity ID": "CHN", "Currency Code": "CNY", "Project Price_": 350000.0, "Project Price": 52115.0, "Project Price_ Ln": 12.765688433465597, "Project Price Ln": "10.861208094173422"}, {"Ma

In [96]:
# URL for the web service
scoring_uri = 'http://eddd553b-8fed-498a-8cf9-b256679a067c.westus.azurecontainer.io/score'
# If the service is authenticated, set the key or token
key = 'YgBAKJ8T0pyeTXzlwvc4CFeXQorCbwPH'

# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
headers['Authorization'] = f'Bearer {key}'

In [97]:
# Make the request and display the response
#while True:
#    try:
resp = requests.post(scoring_uri, input_data, headers=headers)
json_data = resp.text
print("Json Data:", type(json_data))
json_data[0:2000]
#   except Exception as e:
#        result = str(e)
#        print(json.dumps({"error": result}))
#df_rest = pd.DataFrame.from_records(resp.json())
#df_rest

Json Data: <class 'str'>


'"{\\"result\\": [10.058883571992206, 9.872946416519023, 6.946904127405813, 6.9667194063253355, 7.88454088365571, 14.588751790914552, 13.321539364373379, 12.164996402755264, 11.662171535445786, 11.283002969078087, 13.199588233391282, 12.85043976332024, 12.362479839880638, 10.610016321544506, 12.001503588329289, 12.069360658246556, 20.5142263724039, 12.525016965686579, 11.567738623862269, 10.227022948928779, 5.878386550856732, 9.912056224792344, 12.501465378420866, 10.964473759658546, 4.8526889771002075, 17.089410628984457, 8.234583256088209, 6.012882149357053, 10.62341668488425, 15.514644494121582, 12.102382069935604, 11.398106315985427, 11.44507683110207, 11.356807426133077, 11.59220162739095, 10.200731884534527, 11.026680386578445, 16.13436793362564, 9.663163327264051, 9.625472035710565, 14.152218029597138, 5.312558984132812, 10.834787288585709, 18.96208115077433, 10.41676010783099, 6.923116759916064, 8.48102688904406, 10.444396439130383, 6.444656786654274, 11.488238815845289, 11.734

In [98]:
#Convert response to python dictionary
import datetime
from dateutil.tz import tzutc

r_json = resp.json()
r_json_loads = json.loads(r_json)
type(r_json_loads)
print("Data Dict Type=", type(r_json_loads))
print("Data Dict Keys=",r_json_loads.keys())
data_dict = r_json_loads
#data_dict['forecast']
#for i in data_dict:
#    if isinstance(i, dict):
#        for key, value in i.items():
#            print(key, value)
#    else:
#        print(i)
        
#for key, value in data_dict.items():
#        print(key, ":", value)

Data Dict Type= <class 'dict'>
Data Dict Keys= dict_keys(['result'])


In [99]:
#Convert response to python dictionary
#import datetime
#from dateutil.tz import tzutc

#loaded_json = json.loads(json_data)

#print("Loaded Json=",type(loaded_json))
#norm_json = json_normalize(loaded_json)
#print(norm_json)

#print(json.dumps(loaded_json, indent=4, sort_keys=True))

#data_dict = json.loads(loaded_json)
#print("Data Dict Type=", type(data_dict))
#print("Data Dict Keys=",data_dict.keys())
#data_dict['forecast'][0]
#json_normalize(data_dict)
#json_normalize(data_dict, record_path='index')
#df1 = pd.DataFrame.from_dict(json_normalize(data_dict))
#df1['index']=[df1['index']]
#json_normalize(data_dict['index'])



In [100]:
print("Convert Nested Data Dictornary to flattened Dataframe and datetime to date format")
####Convert Nested Data Dictornary to flattened Dataframe
df1 = pd.DataFrame(data_dict)
df2 = pd.DataFrame()
for col in reversed(df1.columns):
    display(col)
    norm = json_normalize(data_dict, record_path=col).add_prefix(f'{col}.')
    df2= pd.concat([df2, norm], axis=1, sort=False)
    df2.columns = df2.columns.astype(str).str.replace("result.", "")
    df2.columns = df2.columns.astype(str).str.replace(".0", "")
    #df2.columns = df2.columns.astype(str).str.replace(" ", "_")

df2['Forecast'] = round(df2['0'],2)
df2.drop(columns=['0'], axis=1, inplace = True)
#show_stats(df2)
df2

Convert Nested Data Dictornary to flattened Dataframe and datetime to date format


'result'

Unnamed: 0,Forecast
0,10.06
1,9.87
2,6.95
3,6.97
4,7.88
...,...
963,4.75
964,9.38
965,5.69
966,7.00


In [105]:
#Concatenate Forecast RESULTS with original dataframe
opp_duration_predict1 = pd.concat([oppln, df2], axis=1, sort=False)
#opp_duration_predict1['Close Date'] = pd.to_datetime(opp_duration_predict1['Close Date']).dt.date
#forecast_final['Diff'] = forecast_final['Forecast'] - forecast_final['Revenue']
#forecast_final['Diff%'] = round(forecast_final['Diff'] / forecast_final['Revenue'],4)
opp_duration_predict1 = opp_duration_predict1.reset_index(drop=True)
#coerce_df_columns_to_numeric(forecast_final)
print("opp_duration_predict1 info:")
#show_stats(opp_duration_predict1)
#opp_duration_predict1.to_csv("opp_duration_predict1.csv", index=False)
opp_duration_predict1[opp_duration_predict1['Managing Fin Entity ID']==entity_debug]
#opp_duration_predict_final.query("'Managing Studio Name'==@entity_debug")

opp_duration_predict1 info:


Unnamed: 0,Project Title - ID,Opportunity Name - ID,Close Date,Managing Fin Entity ID,Currency Code,Project Price_,Project Price,Current Project Duration,Project Price_ Ln,Project Price Ln,Current Project Duration Ln,Forecast
0,[After Party] Intercorp Executive China Immersion Wave 2 - 26964505,Intercorp Executive Immersion Wave 2 - 17441078,2019-07-26 00:00:00,CHN,CNY,1350000.0,195615.0,6.71,14.12,12.18,1.9,10.06
1,[Block Party] Intercorp China Immersion 3/2019 - 24403465,Intercorp China Immersion - 15002226,2019-01-31 00:00:00,CHN,CNY,1196712.0,173523.24,3.71,14.0,12.06,1.31,9.87
2,[CLA Workshop] Midea Workshop 6/19-6/20 - 31499635,[CLA] Midea Workshop 6/19-6/20 - 20972369,2020-06-09 00:00:00,CHN,CNY,300000.0,42240.0,1.71,12.61,10.65,0.54,6.95
3,[CLA Workshop] Tencent University Workshop 4/11-4/12 - 25043125,Tencent University Workshop 4/11-4/12 - 16085782,2019-04-19 00:00:00,CHN,CNY,260000.0,38532.0,1.71,12.47,10.56,0.54,6.97
4,[CLA Workshop] Vanke Wanyi Workshop 4/13-4/14 - 25339615,Vanke Wanyi Workshop 4/13-4/14 - 16348407,2019-04-19 00:00:00,CHN,CNY,350000.0,52115.0,0.71,12.77,10.86,-0.34,7.88
5,[Cong Buddy] Shinho CBL Repositioning - 27281385,Shinho CBL Repositioning - 18055703,2019-09-04 00:00:00,CHN,CNY,5125000.0,727237.5,22.43,15.45,13.5,3.11,14.59
6,[Crimson] Phase 2 - Ford + JMC CV China Strategy - 29802495,Ford + JMC CV China Strategy (Crimson) Phase 2 Pre-Pause - 18415801,2020-03-26 00:00:00,CHN,CNY,4025000.0,581210.0,19.43,15.21,13.27,2.97,13.32
7,[D'Lux] Ford China Luxury Festival - 25215295,Luxury Festival - 16157324,2019-05-09 00:00:00,CHN,CNY,3323182.0,494821.8,14.71,15.02,13.11,2.69,12.16
8,[Flip] CR Suhe - 23691015,CR Suhe - 14476033,2019-01-23 00:00:00,CHN,CNY,3150000.0,453915.0,9.29,14.96,13.03,2.23,11.66
9,[GPGS] Vanke Looking In FTE Program - 23417325,Vanke HQ Good Product Good Service FTE Program - 14684010,2019-01-11 00:00:00,CHN,CNY,1839623.0,266009.49,11.14,14.43,12.49,2.41,11.28


In [107]:
opp_duration_predict1

Unnamed: 0,Project Title - ID,Opportunity Name - ID,Close Date,Managing Fin Entity ID,Currency Code,Project Price_,Project Price,Current Project Duration,Project Price_ Ln,Project Price Ln,Current Project Duration Ln,Forecast
0,[After Party] Intercorp Executive China Immersion Wave 2 - 26964505,Intercorp Executive Immersion Wave 2 - 17441078,2019-07-26 00:00:00,CHN,CNY,1350000.00,195615.00,6.71,14.12,12.18,1.90,10.06
1,[Block Party] Intercorp China Immersion 3/2019 - 24403465,Intercorp China Immersion - 15002226,2019-01-31 00:00:00,CHN,CNY,1196712.00,173523.24,3.71,14.00,12.06,1.31,9.87
2,[CLA Workshop] Midea Workshop 6/19-6/20 - 31499635,[CLA] Midea Workshop 6/19-6/20 - 20972369,2020-06-09 00:00:00,CHN,CNY,300000.00,42240.00,1.71,12.61,10.65,0.54,6.95
3,[CLA Workshop] Tencent University Workshop 4/11-4/12 - 25043125,Tencent University Workshop 4/11-4/12 - 16085782,2019-04-19 00:00:00,CHN,CNY,260000.00,38532.00,1.71,12.47,10.56,0.54,6.97
4,[CLA Workshop] Vanke Wanyi Workshop 4/13-4/14 - 25339615,Vanke Wanyi Workshop 4/13-4/14 - 16348407,2019-04-19 00:00:00,CHN,CNY,350000.00,52115.00,0.71,12.77,10.86,-0.34,7.88
...,...,...,...,...,...,...,...,...,...,...,...,...
963,Yamaha Innovation Workshop - 24771575,Yamaha Innovation Workshop: Business Development - 15823290,2019-03-01 00:00:00,USA,USD,10000.00,10000.00,0.14,9.21,9.21,-1.97,4.75
964,YNAP_CoLab 2019_Portfolio CE - 24316795,YNAP_CoLab 2019_Portfolio CE - 14997398,2019-01-01 00:00:00,USA,USD,150000.00,150000.00,49.00,11.92,11.92,3.89,9.38
965,Zendesk - Call Centers 2 Week Sprint - 25745575,Zendesk - Call Centers 2 Week Sprint - 15876282,2019-05-21 00:00:00,USA,USD,50000.00,50000.00,1.71,10.82,10.82,0.54,5.69
966,Zurich Insurance Tour/Workshop - 31728375,Zurich Insurance Tour/Workshop - 19706884,2020-07-10 00:00:00,USA,USD,32000.00,32000.00,1.71,10.37,10.37,0.54,7.00


In [109]:
## Merge with actual project duration df1
opp_duration_predict_final = opp_duration_predict1
#opp_duration_predict_final1 = pd.concat([oppln, df2], axis=1, sort=False)
#opp_duration_predict_final1 = pd.merge(oppln, opp_duration_predict1, how='right', on=['Managing Fin Entity ID','Project Price Ln'])
#cols = list(pipetalent1.select_dtypes(include='float64').columns)
#coerce_df_columns_to_numeric(pipetalent1, cols)
#pipetalent1 = pipetalent1.query('End_of_Month >= @start_date')
# Replace NAN with 0 and sort by End_of_Month, Studio
#opp_duration_predict_final = opp_duration_predict_final1.replace(np.nan, 0, regex=True)
#opp_duration_predict_final# = opp_duration_predict_final.sort_values(by=['End_of_Month','Studio'])
# Convert LN to EXP
#opp_duration_predict_final.rename(columns={"Forecast": "Forecast Ln"}, inplace = True)
#opp_duration_predict_final['Project Price'] = np.round(np.exp(opp_duration_predict_final['Project Price Ln']),0)
#opp_duration_predict_final['Current Project Duration'] = np.round(np.exp(opp_duration_predict_final['Current Project Duration Ln']),2)
#opp_duration_predict_final['Forecast'] = np.round(np.exp(opp_duration_predict_final['Forecast Ln']),2)
opp_duration_predict_final.to_csv("opp_duration_predict_final.csv", index=False)
opp_duration_predict_final[opp_duration_predict_final['Managing Fin Entity ID']==entity_debug]
#opp_duration_predict_final.columns

Unnamed: 0,Project Title - ID,Opportunity Name - ID,Close Date,Managing Fin Entity ID,Currency Code,Project Price_,Project Price,Current Project Duration,Project Price_ Ln,Project Price Ln,Current Project Duration Ln,Forecast
0,[After Party] Intercorp Executive China Immersion Wave 2 - 26964505,Intercorp Executive Immersion Wave 2 - 17441078,2019-07-26 00:00:00,CHN,CNY,1350000.0,195615.0,6.71,14.12,12.18,1.9,10.06
1,[Block Party] Intercorp China Immersion 3/2019 - 24403465,Intercorp China Immersion - 15002226,2019-01-31 00:00:00,CHN,CNY,1196712.0,173523.24,3.71,14.0,12.06,1.31,9.87
2,[CLA Workshop] Midea Workshop 6/19-6/20 - 31499635,[CLA] Midea Workshop 6/19-6/20 - 20972369,2020-06-09 00:00:00,CHN,CNY,300000.0,42240.0,1.71,12.61,10.65,0.54,6.95
3,[CLA Workshop] Tencent University Workshop 4/11-4/12 - 25043125,Tencent University Workshop 4/11-4/12 - 16085782,2019-04-19 00:00:00,CHN,CNY,260000.0,38532.0,1.71,12.47,10.56,0.54,6.97
4,[CLA Workshop] Vanke Wanyi Workshop 4/13-4/14 - 25339615,Vanke Wanyi Workshop 4/13-4/14 - 16348407,2019-04-19 00:00:00,CHN,CNY,350000.0,52115.0,0.71,12.77,10.86,-0.34,7.88
5,[Cong Buddy] Shinho CBL Repositioning - 27281385,Shinho CBL Repositioning - 18055703,2019-09-04 00:00:00,CHN,CNY,5125000.0,727237.5,22.43,15.45,13.5,3.11,14.59
6,[Crimson] Phase 2 - Ford + JMC CV China Strategy - 29802495,Ford + JMC CV China Strategy (Crimson) Phase 2 Pre-Pause - 18415801,2020-03-26 00:00:00,CHN,CNY,4025000.0,581210.0,19.43,15.21,13.27,2.97,13.32
7,[D'Lux] Ford China Luxury Festival - 25215295,Luxury Festival - 16157324,2019-05-09 00:00:00,CHN,CNY,3323182.0,494821.8,14.71,15.02,13.11,2.69,12.16
8,[Flip] CR Suhe - 23691015,CR Suhe - 14476033,2019-01-23 00:00:00,CHN,CNY,3150000.0,453915.0,9.29,14.96,13.03,2.23,11.66
9,[GPGS] Vanke Looking In FTE Program - 23417325,Vanke HQ Good Product Good Service FTE Program - 14684010,2019-01-11 00:00:00,CHN,CNY,1839623.0,266009.49,11.14,14.43,12.49,2.41,11.28


In [110]:
opp_duration_predict_final[opp_duration_predict_final['Project Price Ln']<=0]

Unnamed: 0,Project Title - ID,Opportunity Name - ID,Close Date,Managing Fin Entity ID,Currency Code,Project Price_,Project Price,Current Project Duration,Project Price_ Ln,Project Price Ln,Current Project Duration Ln,Forecast
32,Craft in Digital: Lincoln DLUX Follow-On - 33484455,Craft in Digital: Lincoln DLUX Follow-On - 21859837,2020-10-30 00:00:00,CHN,CNY,3114810.0,0.0,9.71,14.95,-inf,2.27,11.45
33,Crimson Phase 3 - 32629195,Crimson Phase 3 - 21317269,2020-09-14 00:00:00,CHN,CNY,3040000.0,0.0,13.71,14.93,-inf,2.62,11.36
36,Crimson Phase 3 Extension - 33184385,Crimson Phase 3 Extension - 21627149,2020-10-18 00:00:00,CHN,CNY,1985300.0,0.0,4.71,14.5,-inf,1.55,11.03
64,Capsule - 31505725,Capsule - 20933471,2020-10-05 00:00:00,DEU,EUR,50000.0,0.0,1.71,10.82,-inf,0.54,7.78
146,Shiseido- Holistic Beauty - 33919875,Shiseido Co. Ltd - Holistic beauty service for 2023 - 22286049,2020-12-20 00:00:00,JPN,JPY,45000000.0,0.0,7.86,17.62,-inf,2.06,10.26
174,Aetna - Medicare Supp UX - 32414375,Aetna - Medicare Supp UX - 21632307,2020-10-08 00:00:00,USA,USD,100000.0,0.0,2.71,11.51,-inf,1.0,9.04
332,Cornell SCJ Brand Expression - 29773885,Cornell SCJ Brand Expression - 19497668,2020-11-02 00:00:00,USA,USD,395000.0,0.0,6.71,12.89,-inf,1.9,11.95
372,Diasorin Phase 1A - 33849875,Diasorin Phase 1A - 21853305,2020-10-19 00:00:00,USA,USD,595000.0,0.0,3.71,13.3,-inf,1.31,12.93
439,Ford - D-Ford London 2020: Project Pivot + PV Support + Environments - 33773835,Ford - D-Ford London 2020: Project Pivot + PV Support + Environments - 22129615,2020-10-12 00:00:00,USA,USD,1300000.0,0.0,18.71,14.08,-inf,2.93,13.55
440,Ford - D-Ford London 2020: Q3 &Q4 - 30397555,Ford - D-Ford London 2020 - 18015468,2020-03-02 00:00:00,USA,USD,3184742.0,0.0,35.71,14.97,-inf,3.58,15.78


In [65]:
****************************************************************************************************
****************************************************************************************************
STOP
****************************************************************************************************
****************************************************************************************************

SyntaxError: invalid syntax (<ipython-input-65-ed0f7b4f0176>, line 1)

In [None]:
#pivot by Studio, End_of_Month
forecast_final_pivot = forecast_final.pivot_table(index=['Studio','Snapshot_Date_Short'], columns='Relative_Offset', values='Forecast', aggfunc=np.sum, margins=True)
forecast_final_pivot = forecast_final_pivot.reset_index(level=forecast_final_pivot.index.names)
print("forecast_final_pivot info:")
display(forecast_final_pivot)
#revpipe1_pivot.to_csv("revpipe1_pivot.csv", index=False)

In [None]:
#pivot by EOM1, Studio
forecast_final_pivot1 = forecast_final.pivot_table(index=['Snapshot_Date_Short','Relative_Offset'], columns='Studio', values='Forecast', aggfunc=np.sum, margins=True)
forecast_final_pivot1 = forecast_final_pivot1.reset_index(level=forecast_final_pivot1.index.names)
forecast_final_pivot1Pr

In [None]:

#Add Revenue History Column
#revpipe_final = pd.read_csv("revpipe_final.csv")
#revpipe_final1 = revpipe_final[['Snapshot_Date_Short','End_of_Month','Relative_Offset','Studio','Revenue']]
#revpipe_final1
#revpipe_forecast1 = pd.merge(revpipe_forecast, revpipe_final, how='left', on=['Snapshot_Date_Short','End_of_Month','Relative_Month_Offset','Studio'])

In [None]:
**************************************************************
#Append History Revenue with Forecast
revexphist2 = pd.read_csv("revexphist1.csv")
append_revforcast = revexphist2.append(df2, ignore_index=True, sort=True)
#pd.merge(combine_df2, revhistorydiff, how='inner', on=['End_of_Month','Studio'])
append_revforcast['EOM1'] = pd.to_datetime(append_revforcast['End_of_Month']).dt.date.astype(str)
append_revforcast['End_of_Month'] = pd.to_datetime(append_revforcast['End_of_Month'])
#append_revforcast['Relative_Month_Offset'] = append_revforcast['End_of_Month'].dt.to_period('M') - pd.to_datetime('today').to_period('M')
append_revforcast['Relative_Month_Offset'] = round((append_revforcast['End_of_Month'] - current_eom)/np.timedelta64(1,'M'),0)
cols = append_revforcast.filter(like='Relative_Month_Offset', axis=1).columns
coerce_df_columns_to_numeric(append_revforcast, cols)
append_revforcast = append_revforcast.replace(np.nan, 0, regex=True)
cols = list(append_revforcast.select_dtypes(include='float64').columns)
cols_float1 = append_revforcast.filter(like='Rate', axis=1).columns
cols_float2 = append_revforcast.filter(like='Yield', axis=1).columns
cols_float3 = append_revforcast.filter(like='Diff%', axis=1).columns
cols_float = addlist(cols_float1,cols_float2)
cols_float = addlist(cols_float,cols_float3)
coerce_df_columns_to_numeric(append_revforcast, cols)
#cols = ['Revenue','forecast','origin','Relative_Month_Offset']
#coerce_df_columns_to_numeric(append_revforcast, cols)
append_revforcast['Revenue_Forecast'] = append_revforcast['Revenue'] + append_revforcast['forecast']
append_revforcast = append_revforcast.sort_values(by=['End_of_Month','Studio']).reset_index(drop=True)
append_revforcast =append_revforcast.replace(np.nan, 0, regex=True)
append_revforcast = append_revforcast.pivot_table(index=['End_of_Month','Relative_Month_Offset','Studio'],values=['Revenue','forecast','Revenue_Forecast'],aggfunc=sum)
append_revforcast = append_revforcast.reset_index(level=append_revforcast.index.names)
#write to file
#append_revforcast.to_csv("append_revforcast.csv", index=False)
display("append_revforcast info:")
show_stats(append_revforcast)
display(append_revforcast.query('Studio==@entity_debug'))

In [None]:
#Merge with Pipeline when month and snapshot month are same
pipehist21 = pd.read_csv("pipetalentrev_final.csv")
pipehist21['End_of_Month'] = pd.to_datetime(pipehist21['End_of_Month'])
cols = list(pipehist21.select_dtypes(include='float64').columns)
cols_float1 = pipehist21.filter(like='Rate', axis=1).columns
cols_float2 = pipehist21.filter(like='Yield', axis=1).columns
cols_float3 = pipehist21.filter(like='Diff%', axis=1).columns
cols_float = addlist(cols_float1,cols_float2)
cols_float = addlist(cols_float,cols_float3)
#display(pipehist21.info())
coerce_df_columns_to_numeric(pipehist21, cols)
#Merge revenue forecast with pipehistory
forcastrevpipe = pd.merge(pipehist21, append_revforcast, how='left', on=['End_of_Month','Relative_Month_Offset','Studio','Revenue'])
forcastrevpipe = forcastrevpipe.replace(np.nan, 0, regex=True)
forcastrevpipe['End_of_Month'] = pd.to_datetime(forcastrevpipe['End_of_Month'])
forcastrevpipe['EOM1'] = pd.to_datetime(forcastrevpipe['End_of_Month']).dt.date.astype(str)
forcastrevpipe['Relative_Month_Offset'] = round((forcastrevpipe['End_of_Month'] - current_eom)/np.timedelta64(1,'M'),0)
cols = forcastrevpipe.filter(like='Relative_Month_Offset', axis=1).columns
coerce_df_columns_to_numeric(forcastrevpipe, cols)
#show_stats(forcastrevpipe)
forcastrevpipe['Revenue_Forecast'] = forcastrevpipe['Revenue'] + forcastrevpipe['forecast']
forcastrevpipe['Diff'] = forcastrevpipe['Revenue_Forecast'] - forcastrevpipe['Pipeline']
forcastrevpipe['Diff%'] = round(forcastrevpipe['Diff'] / forcastrevpipe['Pipeline'],4)
cols = list(forcastrevpipe.select_dtypes(include='float64').columns)
cols_float1 = forcastrevpipe.filter(like='Rate', axis=1).columns
cols_float2 = forcastrevpipe.filter(like='Yield', axis=1).columns
cols_float3 = forcastrevpipe.filter(like='Diff%', axis=1).columns
cols_float = addlist(cols_float1,cols_float2)
cols_float = addlist(cols_float,cols_float3)
coerce_df_columns_to_numeric(forcastrevpipe, cols)
forcastrevpipe.replace([np.inf, -np.inf], np.nan, inplace=True)
forcastrevpipe = forcastrevpipe.replace(np.nan, 0, regex=True)
display("forcastrevpipe info:")
show_stats(forcastrevpipe)
forcastrevpipe.to_csv("forcastrevpipe.csv")
display(forcastrevpipe.query('Studio==@entity_debug'))

In [None]:
#pipehist21.drop(columns=['Snapshot_Date_Short','Relative_Snapshot_Month_Offset'], axis=1, inplace = True)
#pipe_offset = ['Pipeline-00','Pipeline-01','Pipeline-02','Pipeline-03',
#               'Pipeline-04','Pipeline-05','Pipeline-06','Pipeline-07',
#               'Pipeline-08','Pipeline-09','Pipeline-10','Pipeline-11'
#              ]
#forcastrevpipe1 = forcastrevpipe.query('(Relative_Offset in  @pipe_offset) or (Relative_Snapshot_Month_Offset == 0 and Relative_Month_Offset > 0)').reset_index(drop=True)
forcastrevpipe1 = forcastrevpipe.query('EOM1 >= "2018-07-01"')
forcastrevpipe_pivot1 = forcastrevpipe1.pivot_table(index=['End_of_Month','Relative_Month_Offset','EOM1','Studio'],# columns=[],
                       values=['Revenue','Revenue_Forecast','Pipeline','Diff','Diff%'],
                       aggfunc={'Revenue':np.sum,'Revenue_Forecast':np.sum,'Pipeline':np.sum,'Diff':np.sum,'Diff%':np.mean},
                       margins=False)#.reset_index()

forcastrevpipe_pivot1 = forcastrevpipe_pivot1.reset_index(level=forcastrevpipe_pivot1.index.names)
forcastrevpipe_pivot1 = forcastrevpipe_pivot1.query('Relative_Month_Offset < @forecast_horizon')
cols = forcastrevpipe_pivot1.filter(like='Relative_Month_Offset', axis=1).columns
coerce_df_columns_to_numeric(forcastrevpipe_pivot1, cols)
#cols = ['Revenue','forecast','origin','Relative_Month_Offset','Revenue_Forecast','Pipeline','Diff']
forcastrevpipe_pivot1 = forcastrevpipe_pivot1.sort_values(by=['End_of_Month','Studio']).reset_index(drop=True)
#forcastrevpipe['Snapshot_Date_Short'] = pd.to_datetime(forcastrevpipe['Snapshot_Date_Short'])
print("\n forcastrevpipe_pivot1: \n")
#display(forcastrevpipe1.info())
#write to file
forcastrevpipe.to_csv("forcastrevpipe_pivot1.csv", index=False)
display("forcastrevpipe_pivot1 info:")
show_stats(forcastrevpipe_pivot1)
display(forcastrevpipe_pivot1.query('Studio==@entity_debug'))

In [None]:
# Analyze by end of month pipeline - revenue Diff%
fig, ax = plt.subplots(figsize=(20,11))
plt.xticks(rotation=-45)
sns.lineplot(x= 'Relative_Month_Offset', y= 'Diff%', hue='Studio', #style='Studio',
             data=forcastrevpipe_pivot1,
             palette="Accent", 
             ci=None, marker="o")
#sns.lineplot(x= 'End_of_Month', y= 'Revenue_Forecast', hue='Studio', #style='Studio',
#             data=forcastrevpipe.query('Relative_Month_Offset >= 0'), 
#             #palette=palette, 
#             ci=None, linewidth=4, marker="o")
#xs=forcastrevpipe['Relative_Month_Offset']
#ys=forcastrevpipe['Diff%']
#for x,y in zip(xs,ys):

#    label = "{:,.0f}".format(y)# + "K"

#    ax.annotate(label, # this is the text
#                 (x,y), # this is the point to label
#                 textcoords="offset points", # how to position the text
#                 xytext=(0,10), # distance from text to points (x,y)
#                 color='black',
#                 ha='center') # horizontal alignment can be left, right or center

#sns.lineplot(x= 'End_of_Month', y= 'forecast', #hue='Studio', style='Studio',
#             data=combine_histforcast, color="r", ci=None)
#ys=combine_histforcast['forecast']
#for x,y in zip(xs,ys):

#    label = "{:.0f}".format(y)

#    ax.annotate(label, # this is the text
#                 (x,y), # this is the point to label
#                 textcoords="offset points", # how to position the text
#                 xytext=(0,10), # distance from text to points (x,y)
#                 color='r',
#                 ha='center') # horizontal alignment can be left, right or center
#ax.legend(['Revenue', 'Revenue forecast'], facecolor='w')
#plt.axvline(x=current_eom, linewidth=2, color='r')
ax.set_title(label="Distribution of Diff% between Revenue - Forecast by Offset, Months", fontsize=20)
plt.show()

In [None]:

forcastrevpipe_pivot2 = forcastrevpipe_pivot1.copy()
forcastrevpipe_pivot2 =forcastrevpipe_pivot2.replace(np.nan, 0, regex=True)
#forcastrevpipe_pivot2 = forcastrevpipe_pivot2.reset_index(level=forcastrevpipe_pivot2.index.names)
forcastrevpipe_pivot2.info()
forcastrevpipe_pivot2

In [None]:
forcastrevpipe_pivot21 = forcastrevpipe_pivot1.query('Relative_Month_Offset >=0').pivot_table(index=['Studio','Relative_Month_Offset'], columns=['EOM1'],
                       values=['Revenue', 'Revenue_Forecast','Pipeline','Diff','Diff%'],
                       aggfunc={'Revenue':np.sum,'Revenue_Forecast':np.sum,'Pipeline':np.sum,'Diff':np.sum,'Diff%':np.mean},
                       margins=False)#.reset_index()
#forcastrevpipe1['Revenue_Forecast'] = round(forcastrevpipe1['Revenue_Forecast']/1000,3)
forcastrevpipe_pivot21 = forcastrevpipe_pivot21.replace(np.nan, 0, regex=True)
forcastrevpipe_pivot21 = forcastrevpipe_pivot21.reset_index(level=forcastrevpipe_pivot21.index.names)
forcastrevpipe_pivot21.info()
forcastrevpipe_pivot21.head()

In [None]:
# multiple line plot
#Set themes
import matplotlib.pyplot as plt
#plot
sns.set()
sns.set_context("poster") #In order of relative size they are: paper, notebook, talk, and poster. Notebook is default
sns.set(style="whitegrid") #Seaborn has five built-in themes to style its plots: darkgrid, whitegrid, dark, white, and ticks. 
# Save a palette to a variable:
palette = sns.color_palette("Accent",8)
# Use palplot and pass in the variable:
#sns.palplot(palette)
# Set the palette using the name of a palette:
#sns.set_palette(palette)

In [None]:
fig, ax = plt.subplots(figsize=(24,9))
plt.xticks(rotation=-45)
#today_eom = today + pd.offsets.MonthEnd(0) 
forcastrevpipe22 = forcastrevpipe_pivot1.groupby(by=['End_of_Month','Relative_Month_Offset','EOM1'], as_index=False)
forcastrevpipe22 = forcastrevpipe22.sum().reset_index(drop=True)
#forcastrevpipe22 = forcastrevpipe22.query('EOM1 >= "2018-07-01"')
display(forcastrevpipe22)
div_by_thousand = 1000
div_by_mil = 1000000
forcastrevpipe22[['Revenue_Forecast','Pipeline','Diff']] = forcastrevpipe22[['Revenue_Forecast','Pipeline','Diff']].div(div_by_mil, axis=0)
sns.lineplot(x= 'End_of_Month', y= 'Revenue_Forecast', #hue='Relative_Month_Offset', style='Relative_Month_Offset',
             data=forcastrevpipe22.query('Relative_Month_Offset <= 0'), color="grey", ci=None, marker="o")
sns.lineplot(x= 'End_of_Month', y= 'Revenue_Forecast', #hue='Relative_Month_Offset', style='Relative_Month_Offset',
             data=forcastrevpipe22.query('Relative_Month_Offset >= 0'), color="g", ci=None, linewidth=4, marker="o")
xs=forcastrevpipe22['End_of_Month']
ys=forcastrevpipe22['Revenue_Forecast']
for x,y in zip(xs,ys):

    label = "{:,.1f}".format(y)# + "K"

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='black',
                 ha='center') # horizontal alignment can be left, right or center

sns.lineplot(x= 'End_of_Month', y= 'Pipeline', #hue='Studio', style='Studio',
             data=forcastrevpipe22, color="b", ci=None, marker="o")
#ys=combine_histforcast['forecast']
#for x,y in zip(xs,ys):

#    label = "{:.0f}".format(y)

#    ax.annotate(label, # this is the text
#                 (x,y), # this is the point to label
#                 textcoords="offset points", # how to position the text
#                 xytext=(0,10), # distance from text to points (x,y)
#                 color='r',
#                 ha='center') # horizontal alignment can be left, right or center
ax.legend(['Revenue','Forecast', 'Pipeline'], facecolor='w')
plt.axvline(x=current_eom, linewidth=2, color='r')
ax.set_title(label="Revenue and Forecast by Months (in Millions)", fontsize=20)
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(24,9))
values = forcastrevpipe22['Relative_Month_Offset'].unique()
clrs = ['grey' if (x < 0) else 'g' for x in values ]
forcastrevpipe21=forcastrevpipe22.copy()
#forcastrevpipe2['Revenue_Forecast'] = round(forcastrevpipe2['Revenue_Forecast']/div_by_mil,4)
#forcastrevpipe2['Diff'] = round(forcastrevpipe2['Diff']/div_by_mil,4)
g = sns.barplot(x="EOM1", y="Revenue_Forecast", ci=None,
                data=forcastrevpipe21,
                palette=clrs, #alpha=0.50,
                ax=ax
               )
#g.map(plt.axhline, y=today, ls='--', c='red')
plt.xticks(rotation=90)
#plt.axvline(x=today_eom, linewidth=2, color='r')
plt.title("Revenue & Forecast by Month (in Millions)",fontsize =18)
for p in g.patches:
    g.annotate("{:,.1f}".format(p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
#ax2 = ax.twinx()    
#sns.lineplot(x= 'EOM1', y= 'Diff', #hue='Studio',# style='Studio',
#             data=forcastrevpipe2, ci=None, 
#             color="r", marker="o", 
#            ax=ax2)   
#xs=forcastrevpipe2['EOM1']
#ys=forcastrevpipe2['Diff']
#for x,y in zip(xs,ys):

#    label = "{:,.0f}".format(y*1000) + "K"

#    ax2.annotate(label, # this is the text
#                 (x,y), # this is the point to label
#                 textcoords="offset points", # how to position the text
#                 xytext=(0,10), # distance from text to points (x,y)
#                 color='r',
#                 ha='center') # horizontal alignment can be left, right or center
g.legend(['Revenue','Forecast'], facecolor='w')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(24,9))
forcastrevpipe21=forcastrevpipe22.copy().query('EOM1>="2018-07-01"')
values = forcastrevpipe21['Relative_Month_Offset'].unique()
clrs = ['grey' if (x < 0) else 'g' for x in values ]
#forcastrevpipe2['Revenue_Forecast'] = round(forcastrevpipe2['Revenue_Forecast']/div_by_mil,3)
#forcastrevpipe2['Diff'] = round(forcastrevpipe2['Diff']/div_by_mil,3)
sns.lineplot(x= 'EOM1', y= 'Pipeline', #hue='Studio',# style='Studio',
             data=forcastrevpipe21, ci=None, 
             color="b", marker="o", 
            ax=ax) 
g = sns.barplot(x="EOM1", y="Revenue_Forecast", ci=None,
                data=forcastrevpipe21,
                palette=clrs, alpha=0.50,
                ax=ax
               )
#g.map(plt.axhline, y=today, ls='--', c='red')
plt.xticks(rotation=90)
#plt.axvline(x=today_eom, linewidth=2, color='r')
plt.title("Difference between Revenue_Forecast and Pipeline by Month (in Millions)",fontsize =18)
for p in g.patches:
    g.annotate("{:,.1f}".format(p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points', alpha=0.5)
ax2 = ax.twinx()    
sns.lineplot(x= 'EOM1', y= 'Diff', #hue='Studio',# style='Studio',
             data=forcastrevpipe21, ci=None, 
             color="r", marker="o", 
            ax=ax2)   
xs=forcastrevpipe21['EOM1']
ys=forcastrevpipe21['Diff']
for x,y in zip(xs,ys):

    label = "{:,.2f}".format(y) + "K"

    ax2.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='r',
                 ha='center') # horizontal alignment can be left, right or center
ax.legend(['Pipeline','Revenue'], facecolor='w')
#ax2.legend(['Diff'], facecolor='w')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(20,11))
plt.xticks(rotation=-45)
sns.lineplot(x= 'End_of_Month', y= 'Revenue_Forecast', hue='Studio', #style='Studio',
             data=forcastrevpipe_pivot1,
             palette="Accent", 
             ci=None, marker="o")
#sns.lineplot(x= 'End_of_Month', y= 'Revenue_Forecast', hue='Studio', #style='Studio',
#             data=forcastrevpipe.query('Relative_Month_Offset >= 0'), 
#             #palette=palette, 
#             ci=None, linewidth=4, marker="o")
xs=forcastrevpipe_pivot1['End_of_Month']
ys=forcastrevpipe_pivot1['Revenue_Forecast']
#for x,y in zip(xs,ys):

#    label = "{:,.0f}".format(y)# + "K"

#    ax.annotate(label, # this is the text
#                 (x,y), # this is the point to label
#                 textcoords="offset points", # how to position the text
#                 xytext=(0,10), # distance from text to points (x,y)
#                 color='black',
#                 ha='center') # horizontal alignment can be left, right or center

#sns.lineplot(x= 'End_of_Month', y= 'forecast', #hue='Studio', style='Studio',
#             data=combine_histforcast, color="r", ci=None)
#ys=combine_histforcast['forecast']
#for x,y in zip(xs,ys):

#    label = "{:.0f}".format(y)

#    ax.annotate(label, # this is the text
#                 (x,y), # this is the point to label
#                 textcoords="offset points", # how to position the text
#                 xytext=(0,10), # distance from text to points (x,y)
#                 color='r',
#                 ha='center') # horizontal alignment can be left, right or center
#ax.legend(['Revenue', 'Revenue forecast'], facecolor='w')
plt.axvline(x=current_eom, linewidth=2, color='r')
ax.set_title(label="Revenue and Forecast by Studio, Months", fontsize=20)
plt.show()

In [None]:
values = forcastrevpipe_pivot1['Relative_Month_Offset'].unique()
clrs = ['grey' if (x < 0) else 'g' for x in values ]
g = sns.catplot(x="End_of_Month", y="Revenue_Forecast", col="Studio", 
                col_wrap=2, legend=True, margin_titles=True,
                data=forcastrevpipe_pivot1, kind="bar", ci=None,
                height=5, aspect=2.5, 
                palette=clrs
               ).set_xticklabels(rotation=-45)
#g.map(plt.axhline, y=today_eom, ls='--', c='red')
g.set_xticklabels(rotation=-45)
plt.subplots_adjust(top=0.95)
plt.suptitle('Revenue and Forecast',fontsize=24)
#axes = g.axes
#for p in axes.patches:
#    axes.annotate('{:.0f}'.format(100*p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), 
#               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
plt.show()

In [None]:
#Combine with diff_mean2
diff_mean1 = pd.read_csv('diff_mean.csv')
#forcastrevpipe1['Revenue_Forecast'] = round(forcastrevpipe1['Revenue_Forecast']/1000,3)
diff_mean1['Relative_Month_Offset'] = pd.to_numeric(diff_mean1['Relative_Offset'].str[-3:])*-1
diff_mean2 = pd.merge(forcastrevpipe_pivot1, diff_mean1, how='left', on=['Relative_Month_Offset','Studio'])
diff_mean2['New_Forecast'] = (diff_mean2[['Revenue_Forecast','Pipeline']].max(axis=1))*(1+diff_mean2['Mean'])
diff_mean2 = diff_mean2.replace(np.nan, 0, regex=True)
#write to file
#diff_mean2.to_csv("diff_mean2.csv", index=False)
display("diff_mean2 info:")
show_stats(diff_mean2)
diff_mean2.query('Studio==@entity_debug')

In [None]:
values = diff_mean2['Relative_Month_Offset'].unique()
clrs = ['grey' if (x < 0) else 'g' for x in values ]
g = sns.catplot(x="Relative_Month_Offset", y="New_Forecast",# col="Studio", 
                #col_wrap=2, legend=True, margin_titles=True,
                data=diff_mean2, kind="bar", ci=None,
                height=10, aspect=2.5, 
                palette=clrs
               ).set_xticklabels(rotation=-45)
#g.map(plt.axhline, y=today_eom, ls='--', c='red')
g.set_xticklabels(rotation=-45)
plt.subplots_adjust(top=0.95)
plt.suptitle('Revenue and Forecast',fontsize=24)
axes = g.ax
for p in axes.patches:
    axes.annotate('{:.0f}'.format(100*p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
plt.show()

In [None]:
Combine with input dataset
#df.columns = df.columns.astype(str).str.replace(" ", "_")
combine_df = pd.merge(df3, df, how='inner', on=['End_of_Month','Studio'])
combine_df2 = combine_df.copy()

combine_df2["Pipeline000-_Diff"] = (combine_df2['forecast']-combine_df2["Pipeline000-"])
combine_df2["Pipeline000-_Diff%"] = round(combine_df2["Pipeline000-_Diff"]/combine_df2["Pipeline000-"],4)
combine_df2["Pipeline001-_Diff"] = (combine_df2['forecast']-combine_df2["Pipeline001-"])
combine_df2["Pipeline001-_Diff%"] = round(combine_df2["Pipeline001-_Diff"]/combine_df2["Pipeline001-"],4)
combine_df2["Pipeline002-_Diff"] = (combine_df2['forecast']-combine_df2["Pipeline002-"])
combine_df2["Pipeline002-_Diff%"] = round(combine_df2["Pipeline002-_Diff"]/combine_df2["Pipeline002-"],4)
combine_df2["Pipeline003-_Diff"] = (combine_df2['forecast']-combine_df2["Pipeline003-"])
combine_df2["Pipeline003-_Diff%"] = round(combine_df2["Pipeline003-_Diff"]/combine_df2["Pipeline003-"],4)
combine_df2 = combine_df2.replace(np.nan, 0, regex=True)
#combine_df2 = combine_df2.sort_index(axis=1)
#write to file
combine_df2.to_csv("revforecastpredicted.csv", index=False)
display("combine_df2 info:")
show_stats(combine_df2)
display(combine_df2)

In [None]:
combine_df2.query('Relative_Month_Offset==0').pivot_table(index=['End_of_Month', 'Studio'],# columns=[],
                       values=["forecast", "Pipeline000-", "Pipeline000-_Diff", "Pipeline000-_Diff%",
                              "Pipeline001-", "Pipeline001-_Diff", "Pipeline001-_Diff%",
                              "Pipeline002-", "Pipeline002-_Diff", "Pipeline002-_Diff%",
                              "Pipeline003-", "Pipeline003-_Diff", "Pipeline003-_Diff%"
                              ],
                       aggfunc={"forecast": np.sum, "Pipeline000-": np.sum, "Pipeline000-_Diff": np.sum, "Pipeline000-_Diff%": np.mean,
                                "Pipeline001-": np.sum, "Pipeline001-_Diff": np.sum, "Pipeline001-_Diff%": np.mean,
                                "Pipeline002-": np.sum, "Pipeline002-_Diff": np.sum, "Pipeline002-_Diff%": np.mean,
                                "Pipeline003-": np.sum, "Pipeline003-_Diff": np.sum, "Pipeline003-_Diff%": np.mean
                               },
                       margins=True)#.reset_index()

In [None]:
#Set themes
import matplotlib.pyplot as plt
#plot
sns.set()
sns.set_context("talk") #In order of relative size they are: paper, notebook, talk, and poster. Notebook is default
sns.set(style="whitegrid") #Seaborn has five built-in themes to style its plots: darkgrid, whitegrid, dark, white, and ticks. 
# Save a palette to a variable:
palette = sns.color_palette("Accent",8)
# Use palplot and pass in the variable:
sns.palplot(palette)
# Set the palette using the name of a palette:
sns.set_palette(palette)

In [None]:
# multiple line plot
fig, ax = plt.subplots(figsize=(20,11))
plt.xticks(rotation=-45)

combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==0')
sns.lineplot(x= 'Studio', y= 'forecast', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="g", ci=None)
xs=combine_df2_filter0['Studio']
ys=combine_df2_filter0['forecast']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='g',
                 ha='center') # horizontal alignment can be left, right or center
sns.lineplot(x= 'Studio', y= 'Pipeline000-', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="b", ci=None)
sns.lineplot(x= 'Studio', y= 'Pipeline000-_Diff', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="r", ci=None)
ys=combine_df2_filter0['Pipeline000-_Diff']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='r',
                 ha='center') # horizontal alignment can be left, right or center
#sns.lineplot(x= 'End_of_Month', y= 'Pipeline002-',
#             data=combine_df2, color="orange") 
ax.legend(['forecast', 'Pipeline000-','Pipeline000-_Diff'], facecolor='w')
ax.set_title(label="Current Month Forecast, Pipeline000- and Difference for " + combine_df2_filter0['EOM1'].max(), fontsize=20)
plt.show()

In [None]:
# multiple line plot
fig, ax = plt.subplots(figsize=(20,11))
plt.xticks(rotation=-45)

combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==0')
sns.lineplot(x= 'Studio', y= 'forecast', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="g", ci=None)
xs=combine_df2_filter0['Studio']
ys=combine_df2_filter0['forecast']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='g',
                 ha='center') # horizontal alignment can be left, right or center
sns.lineplot(x= 'Studio', y= 'Pipeline001-', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="b", ci=None)
sns.lineplot(x= 'Studio', y= 'Pipeline001-_Diff', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="r", ci=None)
ys=combine_df2_filter0['Pipeline001-_Diff']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='r',
                 ha='center') # horizontal alignment can be left, right or center
#sns.lineplot(x= 'End_of_Month', y= 'Pipeline002-',
#             data=combine_df2, color="orange") 
ax.legend(['forecast', 'Pipeline001-','Pipeline001-_Diff'], facecolor='w')
ax.set_title(label="Current Month Forecast, Pipeline001- and Difference for " + combine_df2_filter0['EOM1'].max(), fontsize=20)
plt.show()

In [None]:
# multiple line plot
fig, ax = plt.subplots(figsize=(20,11))
plt.xticks(rotation=-45)

combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==1')
sns.lineplot(x= 'Studio', y= 'forecast', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="g", ci=None)
xs=combine_df2_filter0['Studio']
ys=combine_df2_filter0['forecast']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='g',
                 ha='center') # horizontal alignment can be left, right or center
sns.lineplot(x= 'Studio', y= 'Pipeline001-', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="b", ci=None)
sns.lineplot(x= 'Studio', y= 'Pipeline001-_Diff', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="r", ci=None)
ys=combine_df2_filter0['Pipeline001-_Diff']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='r',
                 ha='center') # horizontal alignment can be left, right or center
#sns.lineplot(x= 'End_of_Month', y= 'Pipeline002-',
#             data=combine_df2, color="orange") 
ax.legend(['forecast', 'Pipeline001-','Pipeline001-_Diff'], facecolor='w')
ax.set_title(label="Next Month Forecast, Pipeline001- and Difference for " + combine_df2_filter0['EOM1'].max(), fontsize=20)
plt.show()

In [None]:
# multiple line plot
fig, ax = plt.subplots(figsize=(20,10))
plt.xticks(rotation=-45)

combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==1')
sns.lineplot(x= 'Studio', y= 'forecast', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="g", ci=None)
xs=combine_df2_filter0['Studio']
ys=combine_df2_filter0['forecast']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='g',
                 ha='center') # horizontal alignment can be left, right or center
sns.lineplot(x= 'Studio', y= 'Pipeline002-', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="b", ci=None)
sns.lineplot(x= 'Studio', y= 'Pipeline002-_Diff', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="r", ci=None)
ys=combine_df2_filter0['Pipeline002-_Diff']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='r',
                 ha='center') # horizontal alignment can be left, right or center
#sns.lineplot(x= 'End_of_Month', y= 'Pipeline002-',
#             data=combine_df2, color="orange") 
ax.legend(['forecast', 'Pipeline002-','Pipeline002-_Diff'], facecolor='w')
ax.set_title(label="Next to next Month Forecast, Pipeline002- and Difference for " + combine_df2_filter0['EOM1'].max(), fontsize=20)
plt.show()

In [None]:
g=sns.catplot(x="Studio", y="Pipeline000-_Diff", hue="End_of_Month", 
                #col_wrap=3, legend=True,
                data=combine_df2, kind="bar", ci=None,
                height=12, aspect=2, palette=palette).set_xticklabels(rotation=-45)
g.set(title="Pipeline Diff by Studio, Month")
axes = g.ax
#axes.set_ylim(0,1500000)

In [None]:
g = sns.catplot(x="End_of_Month", y="Pipeline000-_Diff", hue="Studio", 
                #col_wrap=3, legend=True,
                data=combine_df2, kind="bar", ci=None, legend=True,
                height=12, aspect=2, palette=palette)
g.set_xticklabels(rotation=-45)
g.set(title="Pipeline Diff by Month, Studio")
axes = g.ax
#axes.set_ylim(0,150)

In [None]:
#plt.figure(figsize = (24,12))
sns.relplot(data=combine_df2, x="Pipeline000-", y="forecast", hue='Studio', #col="End_of_Month", col_wrap=3,
                  kind="line", palette="Set1", height=5, aspect=1)

In [None]:
sns.catplot(x="Studio", y="forecast", col="End_of_Month", 
                col_wrap=3, legend=True, margin_titles=True,
                data=combine_df2, kind="bar", ci=None,
                height=5, aspect=2.5, palette=palette).set_xticklabels(rotation=-45)
sns.catplot(x="End_of_Month", y="forecast", col="Studio", 
                col_wrap=3, legend=True, margin_titles=True,
                data=combine_df2, kind="bar", ci=None,
                height=5, aspect=2.5, palette=palette).set_xticklabels(rotation=-45)
################################

In [None]:
#LONG TO WIDE
#combine_df2_grp = combine_df2.groupby(['Studio','End_of_Month'], as_index=True).agg({"forecast":"sum", "Pipeline":"sum", "Pipeline Diff":"sum", "Pipeline Diff%":"mean"}, margins=True).fillna(0)#.reset_index()
#display(combine_df2_grp)
#combine_df2_pivot = combine_df2.pivot_table(index='Studio', columns='End_of_Month', values=["forecast", "Pipeline000-", "Pipeline000- Diff", "Pipeline000- Diff%"], margins=False)
#combine_df2_pivot.columns.name=None
#combine_df2_pivot = combine_df2_pivot.reset_index()
#pd.set_option('display.float_format', lambda x: '%.1f' % x)
#print("\n combine_df2_pivot\n")
#combine_df2_pivot.to_csv("combine_df2_pivot.csv", index=False)
#combine_df2

In [None]:
# multiple line plot
fig, ax = plt.subplots(figsize=(20,11))
plt.xticks(rotation=-45)

combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==0')
sns.lineplot(x= 'Studio', y= 'forecast', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="g", ci=None, linewidth=4, alpha=0.7)
xs=combine_df2_filter0['Studio']
ys=combine_df2_filter0['forecast']
for x,y in zip(xs,ys):

    label = "{:.0f}".format(y)

    ax.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 color='g',
                 ha='center') # horizontal alignment can be left, right or center
sns.lineplot(x= 'Studio', y= 'Pipeline001-', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="b", ci=None)
sns.lineplot(x= 'Studio', y= 'Pipeline002-', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="orange", ci=None)
sns.lineplot(x= 'Studio', y= 'Pipeline003-', #hue='Studio', #style='Studio',
             data=combine_df2_filter0, color="y", ci=None)
#ys=combine_df2_filter0['Pipeline001-_Diff']
#for x,y in zip(xs,ys):

#    label = "{:.0f}".format(y)

#    ax.annotate(label, # this is the text
#                 (x,y), # this is the point to label
#                 textcoords="offset points", # how to position the text
#                 xytext=(0,10), # distance from text to points (x,y)
#                 color='r',
#                 ha='center') # horizontal alignment can be left, right or center
#sns.lineplot(x= 'End_of_Month', y= 'Pipeline002-',
#             data=combine_df2, color="orange") 
ax.legend(['forecast', 'Pipeline001-','Pipeline002-','Pipeline003-'], facecolor='w')
ax.set_title(label="Current Month Forecast, Pipeline001-, 02, 03 for " + combine_df2_filter0['EOM1'].max(), fontsize=20)
plt.show()

In [None]:
combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==0')
g = sns.pairplot(combine_df2_filter0, 
             x_vars=['Pipeline000-', 'Pipeline001-','Pipeline002-','Pipeline003-'], 
             y_vars=['forecast'],
             kind="reg", height=5, palette=palette)
fig=g.fig
#fig.xticklabels(rotation=-45)
fig.subplots_adjust(top=0.93, wspace=0.1)
fig.suptitle("Forecast vs current and previous months Pipeline " + combine_df2_filter0['End_of_Month'].astype(str).max(), size=14, fontweight='bold') 

In [None]:
combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==0')
g = sns.catplot(x="Studio", y="Pipeline000-_Diff", 
                data=combine_df2_filter0, kind="bar",
                height=8, aspect=2.5, palette=palette)
g.set_xticklabels(rotation=-45)
g.ax.set_title(label="Forecast and Pipeline000- Diff by Studio " + combine_df2_filter0['End_of_Month'].astype(str).max(), fontsize=20) 
axes = g.ax
for p in axes.patches:
    axes.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

In [None]:
combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==0')
g = sns.catplot(x="Studio", y="Pipeline001-_Diff", 
                data=combine_df2_filter0, kind="bar",
                height=8, aspect=2.5, palette=palette)
g.set_xticklabels(rotation=-45)
g.ax.set_title(label="Forecast and Pipeline001- Diff by Studio " + combine_df2_filter0['End_of_Month'].astype(str).max(), fontsize=20) 
axes = g.ax
for p in axes.patches:
    axes.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

In [None]:
combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==1')
g = sns.catplot(x="Studio", y="Pipeline001-_Diff", 
                data=combine_df2_filter0, kind="bar",
                height=8, aspect=2.5, palette=palette)
g.set_xticklabels(rotation=-45)
g.ax.set_title(label="Forecast and Pipeline001- Diff by Studio for " + combine_df2_filter0['End_of_Month'].astype(str).max(), fontsize=20) 
axes = g.ax
for p in axes.patches:
    axes.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')


In [None]:
combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==0')
g = sns.catplot(x="Studio", y="Pipeline000-_Diff%", 
                data=combine_df2_filter0, kind="bar",
                height=8, aspect=2.5, palette=palette)
g.set_xticklabels(rotation=-45)
g.ax.set_title(label="Forecast and Pipeline000-_Diff% by Studio for " + combine_df2_filter0['End_of_Month'].astype(str).max(), fontsize=20)
axes = g.ax
for p in axes.patches:
    axes.annotate('{:.1f}%'.format(100*p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

In [None]:
combine_df2_filter0 = combine_df2.query('Relative_Month_Offset==1')
g = sns.catplot(x="Studio", y="Pipeline001-_Diff%", 
                data=combine_df2_filter0, kind="bar",
                height=8, aspect=2.5, palette=palette)
g.set_xticklabels(rotation=-45)
g.ax.set_title(label="Forecast and Pipeline001-_Diff% by Studio for " + combine_df2_filter0['End_of_Month'].astype(str).max(), fontsize=20)
axes = g.ax
for p in axes.patches:
    axes.annotate('{:.1f}%'.format(100*p.get_height()), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

In [None]:
g=sns.catplot(x="Studio", y="forecast", hue="End_of_Month", 
                #col_wrap=3, legend=True,
                data=combine_df2, kind="bar", ci=None,
                height=12, aspect=2, palette=palette).set_xticklabels(rotation=-45)
g.set(title="Forecast by Studio, Month")
axes = g.ax
#axes.set_ylim(0,1500000)

In [None]:
plt.figure(figsize=(20, 10))
g = sns.barplot(x="Studio", y="forecast", 
            data=df3, palette=palette)
plt.xticks(rotation=-45)
plt.title("Forecast by Studio for Current Month",fontsize =18)
#plt.xlabel ('SQUARE KM',fontsize =24)
for p in g.patches:
    g.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

In [None]:
combine_df2_filter2 = combine_df2.query('Relative_Month_Offset<=2')
sns.catplot(x="forecast", y="Studio", 
                col="End_of_Month",ci=None,
                data=combine_df2, kind="bar", 
                height=5, aspect=1.5, palette=palette).set_xticklabels(rotation=-45)
sns.catplot(x="Pipeline000-", y="Studio", 
                col="End_of_Month",ci=None,
                data=combine_df2, kind="bar", 
                height=5, aspect=1.5, palette=palette).set_xticklabels(rotation=-45)
sns.catplot(x="Pipeline000-_Diff", y="Studio", 
                col="End_of_Month",ci=None,
                data=combine_df2, kind="bar", 
                height=5, aspect=1.5, palette=palette).set_xticklabels(rotation=-45)

In [None]:
plt.figure(figsize=(20, 10))
g = sns.barplot(x="Studio", y="forecast", 
            data=df3.query('EOM1=="2020-05-31"', palette=palette)
plt.xticks(rotation=-45)
plt.title("Forecast by Studio for May 2020",fontsize =18)
#plt.xlabel ('SQUARE KM',fontsize =24)
for p in g.patches:
    g.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), 
               ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

In [None]:
g = sns.catplot(x="forecast", y="Studio", col="End_of_Month", col_wrap=3, 
            data=df3.query('EOM1=="2020-06-30"', kind="bar",
           height=10, aspect=1.5, palette=palette).set_xticklabels(rotation=-45)

In [None]:
g = sns.catplot(x="forecast", y="Studio", col="End_of_Month", col_wrap=3, 
            data=df3, kind="bar",
           height=10, aspect=1.5, palette=palette).set_xticklabels(rotation=-45)

In [None]:
g = sns.catplot(x="forecast", y="EOM1", col="Studio", col_wrap=3, 
            data=df3, kind="bar",
           height=10, aspect=1.5, palette=palette)

In [None]:
g=sns.catplot(data=df3_pivot, kind="box",
            height=8, aspect=2.5, palette=palette, orient="h")
#g.ax.set_xlim(-50000,1000000)

In [None]:
g3 = sns.barplot(data=combine_df2.query('Relative_Month_Offset ==0'), y='Studio', x='Pipeline000-_Diff', ci="sd", palette=palette)
g3

In [None]:
g = sns.FacetGrid(combine_df2.query('Relative_Month_Offset <=3'), hue="End_of_Month", col="Studio", palette="Set1", col_wrap=3, margin_titles=True, height=6, aspect=1)
g = g.map(plt.scatter, "Pipeline", "forecast", s=100, linewidth=.5, edgecolor="white").add_legend()
g

In [None]:
#g1 = sns.FacetGrid(combine_df2, hue="Studio", col="End_of_Month", palette="Set1", col_wrap=2, margin_titles=True, height=10, aspect=2)
#g1 = g1.map(plt.scatter, "2020-04-ActRec", "forecast", s=100, linewidth=.5, edgecolor="white").add_legend()
#g1

In [None]:
g2 = sns.jointplot(data=combine_df2, x="Pipeline000-", y="forecast", kind='reg',  height=12)
g2 = sns.jointplot(data=combine_df2, x="Pipeline000-", y="forecast", kind='resid',  height=12)

In [None]:
g3 = sns.FacetGrid(combine_df2, hue="End_of_Month", palette="Set1", margin_titles=True, height=8, aspect=2)
g3 = g3.map(plt.scatter, "Pipeline001-", "forecast", s=100, linewidth=.5, edgecolor="white").add_legend()
g3

In [None]:
g2 = sns.jointplot(data=combine_df2, x="Pipeline001-", y="forecast", kind='reg',  height=8)
g2
g2 = sns.jointplot(data=combine_df2, x="Pipeline001-", y="forecast", kind='resid',  height=8)
g2


In [None]:
g3 = sns.FacetGrid(combine_df2, hue="End_of_Month", margin_titles=True, height=8, aspect=2)
g3 = g3.map(plt.scatter, "Pipeline002-", "forecast", s=100, linewidth=.5, edgecolor="white").add_legend()
g3

In [None]:
g2 = sns.jointplot(data=combine_df2, x="Pipeline002-", y="forecast", kind='reg',  height=8)
g2
g2 = sns.jointplot(data=combine_df2, x="Pipeline002-", y="forecast", kind='resid',  height=8)
g2