# Python Script to Get Cumulative CPUs for a Chosen GraphCat ID, Group, and SubGroup

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import great_expectations as ge
import ipywidgets as widgets  # library for creating widgets for jupyter notebooks
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.display import display
import time
from pathlib import Path
from win10toast import ToastNotifier
import os
import pyodbc  # used for connecting to ODBC data sources
import pandas as pd  # data analysis library
plt.rcParams['figure.figsize'] = [12, 8]
pd.options.display.max_rows=1000
pd.options.display.max_columns=100

### RVMS Database Credentials:

In [2]:
userid_rvms = os.environ['windowsuser']
pw_rvms     = os.environ['windowspwd']
dsn_rvms = 'HDC-SQLP-RVMS'

### Query to obtain list of GraphCat Groups and their Sub-Groups:

In [3]:
%%time

cnxn_string = 'DSN=' + dsn_rvms + ';UID=' + userid_rvms + ';PWD=' + pw_rvms

cnxn = pyodbc.connect(cnxn_string)
cursor = cnxn.cursor()

# Copy/Paste your SQL text here
sql = """
SELECT
    MQ_GRPS.GRP_ID,
    MQ_GRPS.GRP_NM,
    MQ_SUBGRPS.SUBGRP_ID,
    MQ_SUBGRPS.SUBGRP_NM

FROM
    dbo.tbl_MQ_GRPKEYS AS MQ_GRPKEYS

    LEFT JOIN dbo.tbl_MQ_GRPS AS MQ_GRPS ON
    MQ_GRPKEYS.GRP_ID = MQ_GRPS.GRP_ID

    LEFT JOIN dbo.tbl_MQ_SUBGRPS AS MQ_SUBGRPS ON
    MQ_GRPKEYS.SUBGRP_ID = MQ_SUBGRPS.SUBGRP_ID
    """

gc_group_subgroup = pd.read_sql(sql, cnxn, index_col=None)

# For large data (data > RAM, use chunking):
"""
for c in pd.read_sql(sql, cnxn, chunksize=10000):
    c.to_csv(r'D:\temp\resultset.csv', index=False, mode='a')"""

# Close connections
cursor.close()
cnxn.close()

Wall time: 1.3 s


In [4]:
gc_group_subgroup.head()

Unnamed: 0,GRP_ID,GRP_NM,SUBGRP_ID,SUBGRP_NM
0,1,Chassis,1,ABS/TCS/VSA/BSC
1,1,Chassis,2,Alignment/Drift/SWOC
2,1,Chassis,7,Brake Judder
3,1,Chassis,8,Brake Noise
4,1,Chassis,9,Brake Pipes/hoses


### Create unique list of GraphCat groups and subgroups to be used for GUI controls

In [5]:
group_list = sorted(gc_group_subgroup['GRP_NM'].unique().tolist())

In [6]:
subgroup_list = sorted(gc_group_subgroup['SUBGRP_NM'].unique().tolist())

In [7]:
group_list

['Chassis',
 'Denso',
 'Engine',
 'Exterior',
 'Interior',
 'OBD',
 'Other',
 'Transmission']

In [8]:
subgroup_list

['ABS/TCS/VSA/BSC',
 'ATF Cooling',
 'Alignment/Drift/SWOC',
 'Audio',
 'Battery',
 'Body',
 'Brake Judder',
 'Brake Noise',
 'Brake Pipes/hoses',
 'Bulbs',
 'Bumper Paint',
 'Bumpers',
 'CVT Trans',
 'Calipers/Disk/Drums/Pads',
 'Catalytic Converter',
 'Center Console',
 'Center Module',
 'Charging System',
 'Clutch',
 'Combimeter',
 'Conventional Brakes (pedal/booster)',
 'Cooling System',
 'Cruise Control',
 'Cup Holder',
 'Cylinder Head',
 'Dampers',
 'Diff, Transf Controls',
 'Door Liner',
 'Door Systems',
 'Drive Shaft',
 'ECU/PCM/TCM',
 'EGR System',
 'Engine Block',
 'Engine Trim',
 'Engine Wiring',
 'Evap System Component',
 'Exhaust Manifold',
 'Exterior Trim',
 'Fender Cladding',
 'Floor Covering/Carpet',
 'Front Light Housing',
 'Front Seats',
 'Fuel Pump',
 'Fuel Sending Unit',
 'Fuel System',
 'Fuel Tank',
 'Gear Shifter/Select Lever',
 'General Trans',
 'Glass',
 'Glove Box',
 'HVAC',
 'HVAC Vents',
 'Hand/cable Brakes',
 'Hard Start',
 'Harness',
 'Headliner',
 'Hood/Fu

### Create a list of GraphCat Descriptions at GraphCat Level

In [9]:
%%time

cnxn_string = 'DSN=' + dsn_rvms + ';UID=' + userid_rvms + ';PWD=' + pw_rvms

cnxn = pyodbc.connect(cnxn_string)
cursor = cnxn.cursor()

# Copy/Paste your SQL text here
sql = """
SELECT
    DISTINCT
    GC_MASTER.GraphCatID,
    GC_MASTER.GraphCatDesc,
    GC_MASTER.GraphCatType,
    CONCAT(CAST(GC_MASTER.GraphCatID as VARCHAR), ' - ', GC_MASTER.GraphCatDesc) AS GraphCat

FROM
    dbo.tbl_GraphCatMaster as GC_MASTER

WHERE
    GC_MASTER.GraphCatType = 'R'
    AND GC_MASTER.GraphCatDesc like 'R %'
    """

gc_list = pd.read_sql(sql, cnxn, index_col=None)

# For large data (data > RAM, use chunking):
"""
for c in pd.read_sql(sql, cnxn, chunksize=10000):
    c.to_csv(r'D:\temp\resultset.csv', index=False, mode='a')"""

# Close connections
cursor.close()
cnxn.close()

Wall time: 281 ms


In [10]:
gc_list.head()

Unnamed: 0,GraphCatID,GraphCatDesc,GraphCatType,GraphCat
0,1,R MAP TL 2001 KA,R,1 - R MAP TL 2001 KA
1,2,R MAP TL 2001 KC,R,2 - R MAP TL 2001 KC
2,3,R MAP TL 2001 KJ,R,3 - R MAP TL 2001 KJ
3,4,R MAP CL 2001 KA,R,4 - R MAP CL 2001 KA
4,5,R MAP CL 2001 KC,R,5 - R MAP CL 2001 KC


### Could not find an easy solution to split delimited string into multiple columns using native SQL Server SQL, so resorted to using Python functions:

In [11]:
def getModelYear(row) -> str:
    word_token = row['GraphCatDesc'].split()
    
    model_year = word_token[3]
    
    if model_year.isdigit():
        return model_year
    else:
        return word_token[4]
        

def getFactoryCode(row) -> str:
    word_token = row['GraphCatDesc'].split()
    factory_code = word_token[1]
    
    return factory_code.upper()

def getModelName(row) -> str:
    word_token = row['GraphCatDesc'].split()
    model_name = word_token[2]
    
    return model_name.upper()

### Add Model Year, Factory, and Model Name columns

In [12]:
gc_list['ModelYear'] = gc_list.apply(getModelYear, axis='columns')
gc_list['Factory'] = gc_list.apply(getFactoryCode, axis='columns')
gc_list['ModelName'] = gc_list.apply(getModelName, axis='columns')

In [13]:
gc_list.head()

Unnamed: 0,GraphCatID,GraphCatDesc,GraphCatType,GraphCat,ModelYear,Factory,ModelName
0,1,R MAP TL 2001 KA,R,1 - R MAP TL 2001 KA,2001,MAP,TL
1,2,R MAP TL 2001 KC,R,2 - R MAP TL 2001 KC,2001,MAP,TL
2,3,R MAP TL 2001 KJ,R,3 - R MAP TL 2001 KJ,2001,MAP,TL
3,4,R MAP CL 2001 KA,R,4 - R MAP CL 2001 KA,2001,MAP,CL
4,5,R MAP CL 2001 KC,R,5 - R MAP CL 2001 KC,2001,MAP,CL


### Get input from user to limit GraphCats to certain model years and models

In [14]:
years_str = input("Enter one or more model years separated by comma: ").replace(' ', '')
models_str = input("Enter one or more model names separated by comma: ").replace(' ', '').upper()

years_list = years_str.split(',')
models_list = models_str.split(',')

Enter one or more model years separated by comma: 2016,2017,2018
Enter one or more model names separated by comma: civic


In [15]:
models = '|'.join(models_list)

In [16]:
year_criteria = gc_list['ModelYear'].isin(years_list)
model_criteria = gc_list['ModelName'].str.contains(models)

In [17]:
gc_list_final = gc_list[(year_criteria) & (model_criteria)]

In [18]:
graphcats = gc_list_final['GraphCat'].tolist()

In [19]:
graphcats

['1175 - R HMI CIVIC 2016 4DR KA',
 '1176 - R HCM CIVIC 2016 2DR KA',
 '1177 - R HCM CIVIC 2016 2DR KC',
 '1178 - R HCM CIVIC 2016 4DR KA',
 '1179 - R HCM CIVIC 2016 4DR KC',
 '1353 - R HCM CIVIC 2017 4DR KA',
 '1354 - R HCM CIVIC 2017 2DR KA',
 '1355 - R HCM CIVIC 2017 4DR KC',
 '1356 - R HCM CIVIC 2017 2DR KC',
 '1358 - R HMI CIVIC 2017 4DR KA',
 '1480 - R HMI CIVIC 2018 4DR KA',
 '1484 - R HCM CIVIC 2018 4DR KA',
 '1485 - R HCM CIVIC 2018 2DR KA',
 '1494 - R HCM CIVIC 2018 2DR KC',
 '1495 - R HCM CIVIC 2018 4DR KC']

### Query that obtains cumulative Actual CPU and Budgeted CPU by Sale-Month/MFSS at GraphCat ```SubGroup Level```:

Function that executes an SQL query and then stores the results into a pandas dataframe.  The a chart containing cumulative and actual CPU is generated.

In [20]:
%%time

"""
gc_id_entry = input("Enter one or more GraphCat IDs separated by comma: ")
gc_id_list = [gc.strip() for gc in gc_id_entry.split(',')]
number_of_gc = len(gc_id_list)
"""
gc_list = widgets.SelectMultiple(
    options=graphcats,
    index=list(range(len(graphcats))),
    rows=20,
    description='GraphCats',
    disabled=False
)
display(gc_list)

months_list = ['60 months', '96 months', '180 months']
def createActualvsBudgetedCpuSubGroup(group=group_list, subgroup=subgroup_list, months=months_list):
    cnxn_string = 'DSN=' + dsn_rvms + ';UID=' + userid_rvms + ';PWD=' + pw_rvms

    cnxn = pyodbc.connect(cnxn_string)
    cursor = cnxn.cursor()
    
    gc_id_list = gc_list.value
    number_of_gc = len(gc_id_list)

    # Copy/Paste your SQL text here
    sql = """
    WITH TEMP AS (

    SELECT
        GC_Budget.SaleMonth,
        CONCAT(GC_Master.GraphCatDesc, '-', MQ_GRPS.GRP_NM, '-', MQ_SUBGRPS.SUBGRP_NM) as GraphCat,
        CummActual_CPU as CumActual_CPU,
        CummBudgeted_Cpu as CumBudgeted_CPU

    FROM dbo.tbl_GraphCat_BudgetedMatrix as GC_Budget

        LEFT JOIN dbo.tbl_GC_GRPS AS GC_GRPS ON
        GC_Budget.GC_GRP_ID = GC_GRPS.GC_GRP_ID

        LEFT JOIN dbo.tbl_MQ_GRPKEYS AS MQ_GRPKEYS ON
        GC_GRPS.GRPKEY_ID = MQ_GRPKEYS.GRPKEY_ID

        LEFT JOIN dbo.tbl_MQ_GRPS AS MQ_GRPS ON
        MQ_GRPKEYS.GRP_ID = MQ_GRPS.GRP_ID

        LEFT JOIN dbo.tbl_MQ_SUBGRPS AS MQ_SUBGRPS ON
        MQ_GRPKEYS.SUBGRP_ID = MQ_SUBGRPS.SUBGRP_ID

        LEFT JOIN dbo.tbl_GraphCatMaster as GC_Master ON
        GC_GRPS.GraphCatID = GC_Master.GraphCatID


    WHERE
        GC_Master.GraphCatID IN(%s)
        and MQ_GRPS.GRP_NM = ?
        and MQ_SUBGRPS.SUBGRP_NM = ?

    )

    SELECT
        SaleMonth,
        GraphCat,
        /**
        CASE
            WHEN CumActual_CPU = 0 THEN NULL
        ELSE
            CumActual_CPU
        END AS CumActual_CPU,  **/
        CumActual_CPU,
        CumBudgeted_CPU

    FROM
        TEMP

    ORDER BY
        SaleMonth
        """ % (",".join("?" * number_of_gc))
    
    parameter_list = []
    for id in gc_id_list:
        parameter_list.append(int(id.split(' ')[0]))
        
    parameter_list.append(group)
    parameter_list.append(subgroup)

    global cum_actual_budgeted_cpu
    
    cum_actual_budgeted_cpu = pd.read_sql(sql, cnxn, index_col=None, params=parameter_list)

    # For large data (data > RAM, use chunking):
    """
    for c in pd.read_sql(sql, cnxn, chunksize=10000):
        c.to_csv(r'D:\temp\resultset.csv', index=False, mode='a')"""

    # Close connections
    cursor.close()
    cnxn.close()
    
interact_manual(createActualvsBudgetedCpuSubGroup);

SelectMultiple(description='GraphCats', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14), options=('11…

interactive(children=(Dropdown(description='group', options=('Chassis', 'Denso', 'Engine', 'Exterior', 'Interi…

Wall time: 90.2 ms


In [22]:
cum_actual_budgeted_cpu

Unnamed: 0,SaleMonth,GraphCat,CumActual_CPU,CumBudgeted_CPU
0,2015-11,R HCM CIVIC 2016 4DR KA-Engine-HVAC,0.0643,0.381298
1,2015-12,R HCM CIVIC 2016 4DR KA-Engine-HVAC,-0.1657,0.625610
2,2016-01,R HCM CIVIC 2016 4DR KA-Engine-HVAC,0.1612,0.766260
3,2016-02,R HCM CIVIC 2016 4DR KA-Engine-HVAC,0.2613,0.954501
4,2016-03,R HCM CIVIC 2016 4DR KA-Engine-HVAC,0.4748,1.100612
5,2016-04,R HCM CIVIC 2016 4DR KA-Engine-HVAC,0.6311,1.314289
6,2016-04,R HCM CIVIC 2016 2DR KA-Engine-HVAC,0.0000,0.737366
7,2016-05,R HCM CIVIC 2016 4DR KA-Engine-HVAC,0.9709,1.508744
8,2016-05,R HCM CIVIC 2016 2DR KA-Engine-HVAC,0.6613,1.198762
9,2016-06,R HCM CIVIC 2016 2DR KA-Engine-HVAC,1.0168,1.641002


In [23]:
pivoted = cum_actual_budgeted_cpu.pivot_table(index=['SaleMonth'], columns=['GraphCat'], values=['CumActual_CPU','CumBudgeted_CPU'], aggfunc='max')

In [24]:
pivoted

Unnamed: 0_level_0,CumActual_CPU,CumActual_CPU,CumActual_CPU,CumActual_CPU,CumActual_CPU,CumActual_CPU,CumBudgeted_CPU,CumBudgeted_CPU,CumBudgeted_CPU,CumBudgeted_CPU,CumBudgeted_CPU,CumBudgeted_CPU
GraphCat,R HCM CIVIC 2016 2DR KA-Engine-HVAC,R HCM CIVIC 2016 4DR KA-Engine-HVAC,R HCM CIVIC 2017 2DR KA-Engine-HVAC,R HCM CIVIC 2017 4DR KA-Engine-HVAC,R HCM CIVIC 2018 2DR KA-Engine-HVAC,R HCM CIVIC 2018 4DR KA-Engine-HVAC,R HCM CIVIC 2016 2DR KA-Engine-HVAC,R HCM CIVIC 2016 4DR KA-Engine-HVAC,R HCM CIVIC 2017 2DR KA-Engine-HVAC,R HCM CIVIC 2017 4DR KA-Engine-HVAC,R HCM CIVIC 2018 2DR KA-Engine-HVAC,R HCM CIVIC 2018 4DR KA-Engine-HVAC
SaleMonth,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2015-11,,0.0643,,,,,,0.381298,,,,
2015-12,,-0.1657,,,,,,0.62561,,,,
2016-01,,0.1612,,,,,,0.76626,,,,
2016-02,,0.2613,,,,,,0.954501,,,,
2016-03,,0.4748,,,,,,1.100612,,,,
2016-04,0.0,0.6311,,,,,0.737366,1.314289,,,,
2016-05,0.6613,0.9709,,,,,1.198762,1.508744,,,,
2016-06,1.0168,1.4499,,,,,1.641002,1.711718,,,,
2016-07,1.5463,1.7317,,,,,1.954412,1.873871,,,,
2016-08,2.3047,2.124,,,,,2.149567,2.039246,,,,


### From above, we see that the Actual and Budgeted CPU header is at the top level and we want them to be actually appended to the lower level headers

#### We want the actual and budgeted header to be part of the graphcat description header, so how do we concatenate them together?

In [25]:
pivoted.columns.get_level_values(0).tolist()

['CumActual_CPU',
 'CumActual_CPU',
 'CumActual_CPU',
 'CumActual_CPU',
 'CumActual_CPU',
 'CumActual_CPU',
 'CumBudgeted_CPU',
 'CumBudgeted_CPU',
 'CumBudgeted_CPU',
 'CumBudgeted_CPU',
 'CumBudgeted_CPU',
 'CumBudgeted_CPU']

In [26]:
pivoted.columns.get_level_values(1).tolist()

['R HCM CIVIC 2016 2DR KA-Engine-HVAC',
 'R HCM CIVIC 2016 4DR KA-Engine-HVAC',
 'R HCM CIVIC 2017 2DR KA-Engine-HVAC',
 'R HCM CIVIC 2017 4DR KA-Engine-HVAC',
 'R HCM CIVIC 2018 2DR KA-Engine-HVAC',
 'R HCM CIVIC 2018 4DR KA-Engine-HVAC',
 'R HCM CIVIC 2016 2DR KA-Engine-HVAC',
 'R HCM CIVIC 2016 4DR KA-Engine-HVAC',
 'R HCM CIVIC 2017 2DR KA-Engine-HVAC',
 'R HCM CIVIC 2017 4DR KA-Engine-HVAC',
 'R HCM CIVIC 2018 2DR KA-Engine-HVAC',
 'R HCM CIVIC 2018 4DR KA-Engine-HVAC']

#### SOLUTION: Use Python's join function!

In [27]:
new_column_names = ':'.join([a + ' ' + b for a,b in zip(pivoted.columns.get_level_values(1).tolist(), 
                                                        pivoted.columns.get_level_values(0).tolist())]).split(':')

In [28]:
new_column_names

['R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU',
 'R HCM CIVIC 2016 4DR KA-Engine-HVAC CumActual_CPU',
 'R HCM CIVIC 2017 2DR KA-Engine-HVAC CumActual_CPU',
 'R HCM CIVIC 2017 4DR KA-Engine-HVAC CumActual_CPU',
 'R HCM CIVIC 2018 2DR KA-Engine-HVAC CumActual_CPU',
 'R HCM CIVIC 2018 4DR KA-Engine-HVAC CumActual_CPU',
 'R HCM CIVIC 2016 2DR KA-Engine-HVAC CumBudgeted_CPU',
 'R HCM CIVIC 2016 4DR KA-Engine-HVAC CumBudgeted_CPU',
 'R HCM CIVIC 2017 2DR KA-Engine-HVAC CumBudgeted_CPU',
 'R HCM CIVIC 2017 4DR KA-Engine-HVAC CumBudgeted_CPU',
 'R HCM CIVIC 2018 2DR KA-Engine-HVAC CumBudgeted_CPU',
 'R HCM CIVIC 2018 4DR KA-Engine-HVAC CumBudgeted_CPU']

#### From above, we now have column headers labels we wish to have!  But...

In [29]:
pivoted.columns

MultiIndex(levels=[['CumActual_CPU', 'CumBudgeted_CPU'], ['R HCM CIVIC 2016 2DR KA-Engine-HVAC', 'R HCM CIVIC 2016 4DR KA-Engine-HVAC', 'R HCM CIVIC 2017 2DR KA-Engine-HVAC', 'R HCM CIVIC 2017 4DR KA-Engine-HVAC', 'R HCM CIVIC 2018 2DR KA-Engine-HVAC', 'R HCM CIVIC 2018 4DR KA-Engine-HVAC']],
           codes=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5]],
           names=[None, 'GraphCat'])

#### ...we need to remove the top level (0) from the dataframe's ```columns``` attribute and then substitute the column names with the desired/new column names:

In [30]:
pivoted.columns = pivoted.columns.droplevel(0)
pivoted.columns = new_column_names

In [31]:
pivoted

Unnamed: 0_level_0,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2016 4DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2017 2DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2017 4DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2018 2DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2018 4DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumBudgeted_CPU,R HCM CIVIC 2016 4DR KA-Engine-HVAC CumBudgeted_CPU,R HCM CIVIC 2017 2DR KA-Engine-HVAC CumBudgeted_CPU,R HCM CIVIC 2017 4DR KA-Engine-HVAC CumBudgeted_CPU,R HCM CIVIC 2018 2DR KA-Engine-HVAC CumBudgeted_CPU,R HCM CIVIC 2018 4DR KA-Engine-HVAC CumBudgeted_CPU
SaleMonth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2015-11,,0.0643,,,,,,0.381298,,,,
2015-12,,-0.1657,,,,,,0.62561,,,,
2016-01,,0.1612,,,,,,0.76626,,,,
2016-02,,0.2613,,,,,,0.954501,,,,
2016-03,,0.4748,,,,,,1.100612,,,,
2016-04,0.0,0.6311,,,,,0.737366,1.314289,,,,
2016-05,0.6613,0.9709,,,,,1.198762,1.508744,,,,
2016-06,1.0168,1.4499,,,,,1.641002,1.711718,,,,
2016-07,1.5463,1.7317,,,,,1.954412,1.873871,,,,
2016-08,2.3047,2.124,,,,,2.149567,2.039246,,,,


In [32]:
pivoted.shape

(253, 12)

In [33]:
pivoted.reset_index(level=0, inplace=True)

In [34]:
pivoted.head()

Unnamed: 0,SaleMonth,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2016 4DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2017 2DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2017 4DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2018 2DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2018 4DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumBudgeted_CPU,R HCM CIVIC 2016 4DR KA-Engine-HVAC CumBudgeted_CPU,R HCM CIVIC 2017 2DR KA-Engine-HVAC CumBudgeted_CPU,R HCM CIVIC 2017 4DR KA-Engine-HVAC CumBudgeted_CPU,R HCM CIVIC 2018 2DR KA-Engine-HVAC CumBudgeted_CPU,R HCM CIVIC 2018 4DR KA-Engine-HVAC CumBudgeted_CPU
0,2015-11,,0.0643,,,,,,0.381298,,,,
1,2015-12,,-0.1657,,,,,,0.62561,,,,
2,2016-01,,0.1612,,,,,,0.76626,,,,
3,2016-02,,0.2613,,,,,,0.954501,,,,
4,2016-03,,0.4748,,,,,,1.100612,,,,


In [35]:
cpu_long = pd.melt(pivoted, id_vars=['SaleMonth']).dropna()

In [36]:
cpu_long

Unnamed: 0,SaleMonth,variable,value
5,2016-04,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,0.000000
6,2016-05,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,0.661300
7,2016-06,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,1.016800
8,2016-07,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,1.546300
9,2016-08,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,2.304700
10,2016-09,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,2.804200
11,2016-10,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,2.986500
12,2016-11,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,3.227000
13,2016-12,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,3.067400
14,2017-01,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,3.243000


In [37]:
cpu_long.rename(columns={'variable':'GraphCat','value':'Cum_CPU'}, inplace=True)

In [38]:
cpu_long.head(10)

Unnamed: 0,SaleMonth,GraphCat,Cum_CPU
5,2016-04,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,0.0
6,2016-05,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,0.6613
7,2016-06,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,1.0168
8,2016-07,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,1.5463
9,2016-08,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,2.3047
10,2016-09,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,2.8042
11,2016-10,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,2.9865
12,2016-11,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,3.227
13,2016-12,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,3.0674
14,2017-01,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,3.243


In [39]:
cpu_long.columns

Index(['SaleMonth', 'GraphCat', 'Cum_CPU'], dtype='object')

In [40]:
cpu_long.sort_values(by=['GraphCat','SaleMonth'], inplace=True)

In [41]:
# Create MFSS values
grouped = cpu_long['GraphCat'].value_counts()

In [42]:
grouped

R HCM CIVIC 2017 2DR KA-Engine-HVAC CumBudgeted_CPU    229
R HCM CIVIC 2017 4DR KA-Engine-HVAC CumBudgeted_CPU    229
R HCM CIVIC 2018 2DR KA-Engine-HVAC CumBudgeted_CPU    229
R HCM CIVIC 2017 2DR KA-Engine-HVAC CumActual_CPU      229
R HCM CIVIC 2018 2DR KA-Engine-HVAC CumActual_CPU      229
R HCM CIVIC 2017 4DR KA-Engine-HVAC CumActual_CPU      229
R HCM CIVIC 2018 4DR KA-Engine-HVAC CumBudgeted_CPU    229
R HCM CIVIC 2018 4DR KA-Engine-HVAC CumActual_CPU      229
R HCM CIVIC 2016 4DR KA-Engine-HVAC CumActual_CPU      226
R HCM CIVIC 2016 4DR KA-Engine-HVAC CumBudgeted_CPU    226
R HCM CIVIC 2016 2DR KA-Engine-HVAC CumBudgeted_CPU    221
R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU      221
Name: GraphCat, dtype: int64

In [43]:
grouped = grouped.sort_index()  # Ensure the groups are sorted by GraphCat exactly as they are sorted in cpu_long
new_index_list = [[value for value in range(index)] for index in grouped.values]

In [44]:
grouped

R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU      221
R HCM CIVIC 2016 2DR KA-Engine-HVAC CumBudgeted_CPU    221
R HCM CIVIC 2016 4DR KA-Engine-HVAC CumActual_CPU      226
R HCM CIVIC 2016 4DR KA-Engine-HVAC CumBudgeted_CPU    226
R HCM CIVIC 2017 2DR KA-Engine-HVAC CumActual_CPU      229
R HCM CIVIC 2017 2DR KA-Engine-HVAC CumBudgeted_CPU    229
R HCM CIVIC 2017 4DR KA-Engine-HVAC CumActual_CPU      229
R HCM CIVIC 2017 4DR KA-Engine-HVAC CumBudgeted_CPU    229
R HCM CIVIC 2018 2DR KA-Engine-HVAC CumActual_CPU      229
R HCM CIVIC 2018 2DR KA-Engine-HVAC CumBudgeted_CPU    229
R HCM CIVIC 2018 4DR KA-Engine-HVAC CumActual_CPU      229
R HCM CIVIC 2018 4DR KA-Engine-HVAC CumBudgeted_CPU    229
Name: GraphCat, dtype: int64

### Create new datafame index that matches what we call "MFSS"

In [45]:
new_index = []
for mylist in new_index_list:
    new_index = new_index + mylist

In [46]:
new_index

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,


In [47]:
cpu_long

Unnamed: 0,SaleMonth,GraphCat,Cum_CPU
5,2016-04,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,0.000000
6,2016-05,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,0.661300
7,2016-06,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,1.016800
8,2016-07,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,1.546300
9,2016-08,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,2.304700
10,2016-09,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,2.804200
11,2016-10,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,2.986500
12,2016-11,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,3.227000
13,2016-12,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,3.067400
14,2017-01,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,3.243000


In [48]:
cpu_long.index = new_index

In [49]:
cpu_long

Unnamed: 0,SaleMonth,GraphCat,Cum_CPU
0,2016-04,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,0.000000
1,2016-05,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,0.661300
2,2016-06,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,1.016800
3,2016-07,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,1.546300
4,2016-08,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,2.304700
5,2016-09,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,2.804200
6,2016-10,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,2.986500
7,2016-11,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,3.227000
8,2016-12,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,3.067400
9,2017-01,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,3.243000


In [50]:
# Create a column from the index values and rename
cpu_long.reset_index(level=0, inplace=True)
cpu_long.rename(columns={'index':'MFSS'}, inplace=True)

In [51]:
cpu_long

Unnamed: 0,MFSS,SaleMonth,GraphCat,Cum_CPU
0,0,2016-04,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,0.000000
1,1,2016-05,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,0.661300
2,2,2016-06,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,1.016800
3,3,2016-07,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,1.546300
4,4,2016-08,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,2.304700
5,5,2016-09,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,2.804200
6,6,2016-10,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,2.986500
7,7,2016-11,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,3.227000
8,8,2016-12,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,3.067400
9,9,2017-01,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,3.243000


In [52]:
# Create data in wide format to wide format so that we can plot using MATPLOTLIB
cpu_wide = cpu_long.pivot(index='MFSS',columns='GraphCat',values='Cum_CPU')

In [53]:
cpu_wide

GraphCat,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2016 2DR KA-Engine-HVAC CumBudgeted_CPU,R HCM CIVIC 2016 4DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2016 4DR KA-Engine-HVAC CumBudgeted_CPU,R HCM CIVIC 2017 2DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2017 2DR KA-Engine-HVAC CumBudgeted_CPU,R HCM CIVIC 2017 4DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2017 4DR KA-Engine-HVAC CumBudgeted_CPU,R HCM CIVIC 2018 2DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2018 2DR KA-Engine-HVAC CumBudgeted_CPU,R HCM CIVIC 2018 4DR KA-Engine-HVAC CumActual_CPU,R HCM CIVIC 2018 4DR KA-Engine-HVAC CumBudgeted_CPU
MFSS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,0.0,0.737366,0.0643,0.381298,0.0,0.348897,0.0192,0.310039,0.0,0.171166,0.0,0.117554
1,0.6613,1.198762,-0.1657,0.62561,0.0,0.532823,0.1074,0.459011,0.0,0.281465,0.0997,0.167973
2,1.0168,1.641002,0.1612,0.76626,0.0,0.722406,0.0707,0.599896,0.0,0.368827,0.1495,0.242275
3,1.5463,1.954412,0.2613,0.954501,0.0358,0.863783,0.0938,0.698681,0.1169,0.434014,0.2526,0.322911
4,2.3047,2.149567,0.4748,1.100612,0.025,0.944725,0.1435,0.765011,0.076,0.491457,0.2714,0.407671
5,2.8042,2.384165,0.6311,1.314289,0.1834,1.043664,0.2319,0.856145,0.3173,0.569813,0.3394,0.518734
6,2.9865,2.633567,0.9709,1.508744,0.3466,1.132817,0.5213,0.919362,0.6286,0.631982,0.5217,0.634053
7,3.227,2.872256,1.4499,1.711718,0.7236,1.215883,0.8195,1.006793,0.68,0.712947,0.6425,0.775735
8,3.0674,3.082421,1.7317,1.873871,0.8917,1.293341,0.9436,1.059502,0.8203,0.798585,0.8372,0.939818
9,3.243,3.314934,2.124,2.039246,1.06,1.389369,1.0784,1.131319,1.0655,0.887694,1.2848,1.131058


In [54]:
cpu_wide.to_excel(r'D:\temp\chart_data_subgroup_level.xlsx')