## Prediction dataset
In this notebook we essentially do the exact same procedure that you find in the "final_dataset.ipynb", but for years 2025, 2026 and 2027, which are the "prediction" years, for which we have no target. 

In [1]:
import pandas as pd
import numpy as np

In [16]:
ebitda = pd.read_csv("../clean_data/ebitda_complete.csv")
cogs = pd.read_csv("../clean_data/cogs_complete.csv")
revenue = pd.read_csv("../clean_data/revenue_complete.csv")
market = pd.read_csv("../input_data/market_data.csv")
final_df = pd.read_csv("../clean_data/final_df.csv", index_col=0)
#keep only three years from final_df, which are then replaced by the prediction years
prediction_df = final_df[final_df["Year"]>=2022]
prediction_df = prediction_df.rename(columns={"Value": "CompanySpend"})
prediction_df.replace({2022:2025, 2023:2026, 2024:2027}, inplace=True)


### Financials
for the financial data, we do have predictions until 2027, so we use them. Remember to include 2023 and 2024 as we need that for the lagged financial variables. 

In [17]:
ebitda = ebitda[ebitda["Year"]>=2023]
cogs = cogs[cogs["Year"]>=2023]
revenue = revenue[revenue["Year"]>=2023]

#then we do the company name mapping we did also in the final_dataset notebook
comp_map = pd.read_csv("../input_data/financials_map.csv", delimiter= ",", encoding='unicode_escape', index_col = 0)
replacement_dict = dict(zip(comp_map["Company Tag from S&P Global"], comp_map["Company Name from Spend Tracker"]))
cogs.Company = cogs.Company.replace(replacement_dict)
ebitda.Company = ebitda.Company.replace(replacement_dict)
revenue.Company = revenue.Company.replace(replacement_dict)

In [18]:
#now insert all financial data (with the right year-corresponding observations) in the predictions_df

#cogs
prediction_df.drop("Cogs", axis = 1, inplace=True)
prediction_df=prediction_df.merge(cogs, how="left", on=["Company", "Year"])
prediction_df = prediction_df.rename({"Value":"Cogs"}, axis=1)
cogs["Year"] = cogs["Year"]+1
prediction_df.drop("Cogs_1", axis = 1, inplace=True)
prediction_df=prediction_df.merge(cogs, how="left", on=["Company", "Year"])
prediction_df = prediction_df.rename({"Value":"Cogs_1"}, axis=1)
cogs["Year"] = cogs["Year"]+1
prediction_df.drop("Cogs_2", axis = 1, inplace=True)
prediction_df=prediction_df.merge(cogs, how="left", on=["Company", "Year"])
prediction_df = prediction_df.rename({"Value":"Cogs_2"}, axis=1)

#ebitda
prediction_df.drop("Ebitda", axis = 1, inplace=True)
prediction_df=prediction_df.merge(ebitda, how="left", on=["Company", "Year"])
prediction_df = prediction_df.rename({"Ebitda":"ebitda"}, axis=1)
ebitda["Year"] = ebitda["Year"]+1
prediction_df.drop("Ebitda_1", axis = 1, inplace=True)
prediction_df=prediction_df.merge(ebitda, how="left", on=["Company", "Year"])
prediction_df = prediction_df.rename({"Ebitda":"Ebitda_1"}, axis=1)
ebitda["Year"] = ebitda["Year"]+1
prediction_df.drop("Ebitda_2", axis = 1, inplace=True)
prediction_df=prediction_df.merge(ebitda, how="left", on=["Company", "Year"])
prediction_df = prediction_df.rename({"Ebitda":"Ebitda_2"}, axis=1)

#revenue
prediction_df.drop("Revenue", axis = 1, inplace=True)
prediction_df=prediction_df.merge(revenue, how="left", on=["Company", "Year"])
prediction_df = prediction_df.rename({"Revenue":"revenue"}, axis=1)
revenue["Year"] = revenue["Year"]+1
prediction_df.drop("Revenue_1", axis = 1, inplace=True)
prediction_df=prediction_df.merge(revenue, how="left", on=["Company", "Year"])
prediction_df = prediction_df.rename({"Revenue":"Revenue_1"}, axis=1)
revenue["Year"] = revenue["Year"]+1
prediction_df.drop("Revenue_2", axis = 1, inplace=True)
prediction_df=prediction_df.merge(revenue, how="left", on=["Company", "Year"])
prediction_df = prediction_df.rename({"Revenue":"Revenue_2"}, axis=1)

prediction_df = prediction_df.rename({"ebitda":"Ebitda", "revenue":"Revenue"}, axis=1)
prediction_df[(prediction_df.Company == "Facebook (Meta)") & (prediction_df.Market == "Consumer")]




Unnamed: 0,Company,Device,Market,SubMarket,Year,CompanySpend,Region,Spendings,MarketSpends,SubMarketSpends,DevSpends,Cogs,Cogs_1,Cogs_2,Ebitda,Ebitda_1,Ebitda_2,Revenue,Revenue_1,Revenue_2
2499,Facebook (Meta),Amplifier/Comparator,Consumer,Other Consumer,2025,2.7093,AmericasAsia & Oceania (exc. Japan)EMEAJapanAm...,116.0,7755.155,2131.102,5469.003,432.249286,423.256071,414.262857,94700.02,83816.96,70266.82,167677.79,70266.82,133151.09
2500,Facebook (Meta),Amplifier/Comparator,Consumer,Other Consumer,2026,2.0539,AmericasAsia & Oceania (exc. Japan)EMEAJapanAm...,88.0,6292.004,1757.005,5133.007,441.2425,432.249286,423.256071,106657.76,94700.02,83816.96,184547.58,167677.79,70266.82
2501,Facebook (Meta),Amplifier/Comparator,Consumer,Other Consumer,2027,2.2004,AmericasAsia & Oceania (exc. Japan)EMEAJapanAm...,93.0,6430.006,1936.006,5225.0,450.235714,441.2425,432.249286,119613.89,106657.76,94700.02,202653.0,184547.58,167677.79
2508,Facebook (Meta),Analog Application Specific IC,Consumer,Other Consumer,2025,6.5614,AmericasAsia & Oceania (exc. Japan)EMEAJapanAm...,1194.104,7755.155,2131.102,58394.007,432.249286,423.256071,414.262857,94700.02,83816.96,70266.82,167677.79,70266.82,133151.09
2509,Facebook (Meta),Analog Application Specific IC,Consumer,Other Consumer,2026,5.497,AmericasAsia & Oceania (exc. Japan)EMEAJapanAm...,1039.002,6292.004,1757.005,51983.007,441.2425,432.249286,423.256071,106657.76,94700.02,83816.96,184547.58,167677.79,70266.82
2510,Facebook (Meta),Analog Application Specific IC,Consumer,Other Consumer,2027,5.5946,AmericasAsia & Oceania (exc. Japan)EMEAJapanAm...,1222.001,6430.006,1936.006,51955.005,450.235714,441.2425,432.249286,119613.89,106657.76,94700.02,202653.0,184547.58,167677.79
2517,Facebook (Meta),Data Converters,Consumer,Other Consumer,2025,4.6802,AmericasAsia & Oceania (exc. Japan)EMEAJapanAm...,281.0,7755.155,2131.102,5342.999,432.249286,423.256071,414.262857,94700.02,83816.96,70266.82,167677.79,70266.82,133151.09
2518,Facebook (Meta),Data Converters,Consumer,Other Consumer,2026,2.8639,AmericasAsia & Oceania (exc. Japan)EMEAJapanAm...,170.001,6292.004,1757.005,4949.004,441.2425,432.249286,423.256071,106657.76,94700.02,83816.96,184547.58,167677.79,70266.82
2519,Facebook (Meta),Data Converters,Consumer,Other Consumer,2027,2.5978,AmericasAsia & Oceania (exc. Japan)EMEAJapanAm...,152.002,6430.006,1936.006,5028.001,450.235714,441.2425,432.249286,119613.89,106657.76,94700.02,202653.0,184547.58,167677.79
2526,Facebook (Meta),Interface,Consumer,Other Consumer,2025,1.3482,AmericasAsia & Oceania (exc. Japan)EMEAJapanAm...,32.0,7755.155,2131.102,4342.007,432.249286,423.256071,414.262857,94700.02,83816.96,70266.82,167677.79,70266.82,133151.09


### Market

In [19]:
market = market[market["Region"]=="Worldwide"]

In [20]:
mapping_dict = {('Automotive Electronics Categories', 'ADAS'): ('Automotive', 'Auto ADAS'),
    ('Automotive Electronics Categories', 'Body & Convenience'): ('Automotive', 'Other Auto & Aftermarket'),
    ('Automotive Electronics Categories', 'Chassis & Safety'): ('Automotive', 'Other Auto & Aftermarket'),
    ('Automotive Electronics Categories', 'Connectivity & Telematics'): ('Automotive', 'Other Auto & Aftermarket'),
    ('Automotive Electronics Categories', 'Hybrid & Electric Drive Train'): ('Automotive', 'Other Auto & Aftermarket'),
    ('Automotive Electronics Categories', 'Infotainment & Cluster'): ('Automotive', 'Auto Infotainment'),
    ('Automotive Electronics Categories', 'Other Automotive'): ('Automotive', 'Other Auto & Aftermarket'),
    ('Automotive Electronics Categories', 'Powertrain & Vehicle Dynamics'): ('Automotive', 'Auto Powertrain'),
    ('Computing & Data Storage Categories', 'Data Center Servers'): ('Computer Platforms', 'Data Center Servers'),
    ('Computing & Data Storage Categories', 'Desktop PCs'): ('Computer Platforms', 'Desktop PCs'),
    ('Computing & Data Storage Categories', 'Flash Storage Cards'): ('Computer Peripherals & Storage', 'Flash Cards/Drives'),
    ('Computing & Data Storage Categories', 'Hard Disk Drives'): ('Computer Peripherals & Storage', 'HDD'),
    ('Computing & Data Storage Categories', 'Notebook PCs'): ('Computer Platforms', 'Notebook PCs'),
    ('Computing & Data Storage Categories', 'Other Computing'): ('Computer Platforms', 'Other Computer Products'),
    ('Computing & Data Storage Categories', 'Other Data Storage'): ('Computer Peripherals & Storage', 'Other Storage'),
    ('Computing & Data Storage Categories', 'Other Peripherals'): ('Computer Peripherals & Storage', 'Other Peripherals'),
    ('Computing & Data Storage Categories', 'Smart Cards'): ('Computer Peripherals & Storage', 'Smart Cards'),
    ('Computing & Data Storage Categories', 'Solid-State Drives'): ('Computer Peripherals & Storage', 'Flash Cards/Drives'),
    ('Computing & Data Storage Categories', 'Tablet PCs'): ('Computer Platforms', 'Tablet PCs'),
    ('Computing & Data Storage Categories', 'USB Flash Drive'): ('Computer Peripherals & Storage', 'Flash Cards/Drives'),
    ('Consumer Electronics Categories', 'Fitness & Wellness Wearable Electronics'): ('Consumer', 'Other Consumer'),
    ('Consumer Electronics Categories', 'LCD TV'): ('Consumer', 'TV'),
    ('Consumer Electronics Categories', 'Major Home Appliances'): ('Consumer', 'Appliance'),
    ('Consumer Electronics Categories', 'OLED TV'): ('Consumer', 'TV'),
    ('Consumer Electronics Categories', 'Other Audio/Video'): ('Consumer', 'Audio'),
    ('Consumer Electronics Categories', 'Other Consumer Electronics'): ('Consumer', 'Other Consumer'),
    ('Consumer Electronics Categories', 'Set-Top Boxes'): ('Consumer', 'STB'),
    ('Consumer Electronics Categories', 'Smart Speakers & Digital Assistants'): ('Consumer', 'Connected Consumer'),
    ('Consumer Electronics Categories', 'Smart Watches'): ('Consumer', 'Other Consumer'),
    ('Consumer Electronics Categories', 'VR Headsets'): ('Consumer', 'Other Consumer'),
    ('Consumer Electronics Categories', 'Video Game Consoles'): ('Consumer', 'Video Games'),
    ('Industrial Electronics Categories', 'Automation'): ('Industrial', 'Other Industrial'),
    ('Industrial Electronics Categories', 'Building & Home Control'): ('Industrial', 'Other Industrial'),
    ('Industrial Electronics Categories', 'Lighting'): ('Industrial', 'Other Industrial'),
    ('Industrial Electronics Categories', 'Medical Electronics'): ('Industrial', 'Medical'),
    ('Industrial Electronics Categories', 'Military & Civil Aerospace'): ('Industrial', 'Military/Aerospace'),
    ('Industrial Electronics Categories', 'Other Industrial'): ('Industrial', 'Other Industrial'),
    ('Industrial Electronics Categories', 'Power & Energy'): ('Industrial', 'Power & Energy'),
    ('Industrial Electronics Categories', 'Security & Video Surveillance'): ('Industrial', 'Other Industrial'),
    ('Industrial Electronics Categories', 'Test & Measurement'): ('Industrial', 'Test & Measurement'),
    ('Industrial Electronics Categories', 'Other Industrial') : ('Industrial', 'Manufacturing Equipment'),
    ('Wired Communications Categories', 'Broadcast & Streaming Video'): ('Wired Communications', 'Other Wired'),
    ('Wired Communications Categories', 'Cable Aggregation Equipment'): ('Wired Communications', 'Carrier'),
    ('Wired Communications Categories', 'Cable CPE'): ('Wired Communications', 'Carrier'),
    ('Wired Communications Categories', 'Carrier Ethernet Switches & Routers'): ('Wired Communications', 'Carrier'),
    ('Wired Communications Categories', 'DSL Aggregation Equipment'): ('Wired Communications', 'Carrier'),
    ('Wired Communications Categories', 'DSL CPE'): ('Wired Communications', 'Carrier'),
    ('Wired Communications Categories', 'Data Center Network Switches'): ('Wired Communications', 'DC Network & Threat Mitigation'),
    ('Wired Communications Categories', 'Enterprise Ethernet Switches & Routers'): ('Wired Communications', 'Enterprise/SOHO'),
    ('Wired Communications Categories', 'Enterprise UC & Voice'): ('Wired Communications', 'Enterprise/SOHO'),
    ('Wired Communications Categories', 'FTTH Aggregation Equipment'): ('Wired Communications', 'Carrier'),
    ('Wired Communications Categories', 'FTTH CPE'): ('Wired Communications', 'Carrier'),
    ('Wired Communications Categories', 'Low-Tier Consumer/SOHO Routers'): ('Wired Communications', 'Enterprise/SOHO'),
    ('Wired Communications Categories', 'Optical Equipment'): ('Wired Communications', 'Other Wired'),
    ('Wired Communications Categories', 'Other Wired Communications'): ('Wired Communications', 'Other Wired'),
    ('Wired Communications Categories', 'Threat Mitigation Products'): ('Wired Communications', 'DC Network & Threat Mitigation'),
    ('Wireless Communications Categories', 'Gray Market Handsets'): ('Wireless Communications', 'Handset'),
    ('Wireless Communications Categories', 'High-Tier Smartphone'): ('Wireless Communications', 'Handset'),
    ('Wireless Communications Categories', 'Low-Tier Smartphone'): ('Wireless Communications', 'Handset'),
    ('Wireless Communications Categories', 'M2M Modules'): ('Wireless Communications', 'Other Wireless'),
    ('Wireless Communications Categories', 'Media Tablets'): ('Wireless Communications', 'Media Tablets'),
    ('Wireless Communications Categories', 'Mid-Tier Smartphone'): ('Wireless Communications', 'Handset'),
    ('Wireless Communications Categories', 'Mobile Comm Infrastructure'): ('Wireless Communications', 'Infrastructure'),
    ('Wireless Communications Categories', 'Mobile Phone (ULCH, Entry, Feature)'): ('Wireless Communications', 'Handset'),
    ('Wireless Communications Categories', 'Other Wireless Communications'): ('Wireless Communications', 'Other Wireless'),
    ('Wireless Communications Categories', 'Wireless LAN Equipment'): ('Wireless Communications', 'Infrastructure')}

In [21]:
#Rename the market and submarket
def remapping(market, submarket):
    key = (market, submarket)
    return mapping_dict[key][0], mapping_dict[key][1]

market.Market, market.Submarket = zip(*market.apply(lambda x : remapping(x.Market, x.Submarket), axis = 1))
market.rename(columns = {"Submarket" : "SubMarket"}, inplace = True)
market.drop("Region", axis=1, inplace = True)

In [22]:
market = market.groupby(["Market", "SubMarket", "Device", "Year"]).sum().reset_index()
market[(market.Market == "Consumer") & (market.SubMarket == "Other Consumer") & (market.Device == "Interface")]

Unnamed: 0,Market,SubMarket,Device,Year,Market size
1760,Consumer,Other Consumer,Interface,2008,101.281
1761,Consumer,Other Consumer,Interface,2009,85.324
1762,Consumer,Other Consumer,Interface,2010,87.143
1763,Consumer,Other Consumer,Interface,2011,83.143
1764,Consumer,Other Consumer,Interface,2012,47.688
1765,Consumer,Other Consumer,Interface,2013,37.962
1766,Consumer,Other Consumer,Interface,2014,31.664
1767,Consumer,Other Consumer,Interface,2015,25.628
1768,Consumer,Other Consumer,Interface,2016,15.821
1769,Consumer,Other Consumer,Interface,2017,24.251


In [23]:
marketspendings = market[["Market", "Year", "Market size"]].groupby(["Market", "Year"]).sum().reset_index()
submarketspendings = market[["SubMarket", "Year", "Market size"]].groupby(["SubMarket", "Year"]).sum().reset_index()
devicespendings = market[["Device", "Year", "Market size"]].groupby(["Device", "Year"]).sum().reset_index()

In [25]:
#merge prediction_df with the market data which is now mapped
prediction_df = prediction_df.drop("Spendings", axis = 1)
prediction_df=prediction_df.merge(market, how="left", on=["Market", "SubMarket", "Device", "Year"])
prediction_df = prediction_df.rename({"Market size":"Spendings"}, axis=1)

prediction_df.drop(["DevSpends", "MarketSpends", "SubMarketSpends"], axis=1, inplace=True)
prediction_df = prediction_df.merge(marketspendings, how = 'left', on = ["Market", "Year"])
prediction_df.rename(columns = {"Market size" : "MarketSpends"}, inplace = True)
prediction_df = prediction_df.merge(submarketspendings, how = 'left', on = ["SubMarket", "Year"])
prediction_df.rename(columns = {"Market size" : "SubMarketSpends"}, inplace = True)
prediction_df = prediction_df.merge(devicespendings, how = 'left', on = ["Device", "Year"])
prediction_df.rename(columns = {"Market size" : "DevSpends"}, inplace = True)
prediction_df

Unnamed: 0,Company,Device,Market,SubMarket,Year,CompanySpend,Region,Cogs,Cogs_1,Cogs_2,Ebitda,Ebitda_1,Ebitda_2,Revenue,Revenue_1,Revenue_2,Spendings,MarketSpends,SubMarketSpends,DevSpends
0,ABB,Amplifier/Comparator,Industrial,Manufacturing Equipment,2025,13.879158,AmericasAsia & Oceania (exc. Japan)EMEAJapan,10364.717143,10122.197857,9879.678571,6271.24,5943.65,5807.680000,34510.78,32970.35,32174.52,136.463,18718.436,1286.700,5641.009
1,ABB,Amplifier/Comparator,Industrial,Manufacturing Equipment,2026,16.234860,AmericasAsia & Oceania (exc. Japan)EMEAJapan,10607.236429,10364.717143,10122.197857,6997.00,6271.24,5943.650000,36835.00,34510.78,32970.35,143.282,20062.488,1354.429,5829.007
2,ABB,Amplifier/Comparator,Industrial,Manufacturing Equipment,2027,15.408600,AmericasAsia & Oceania (exc. Japan)EMEAJapan,10849.755714,10607.236429,10364.717143,7210.00,6997.00,6271.240000,38700.25,36835.00,34510.78,152.239,21590.113,1439.354,6035.001
3,ABB,Amplifier/Comparator,Industrial,Power & Energy,2025,10.040442,AmericasAsia & Oceania (exc. Japan)EMEAJapan,10364.717143,10122.197857,9879.678571,6271.24,5943.65,5807.680000,34510.78,32970.35,32174.52,349.156,18718.436,3921.473,5641.009
4,ABB,Amplifier/Comparator,Industrial,Power & Energy,2026,10.927740,AmericasAsia & Oceania (exc. Japan)EMEAJapan,10607.236429,10364.717143,10122.197857,6997.00,6271.24,5943.650000,36835.00,34510.78,32970.35,387.420,20062.488,4447.796,5829.007
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9319,Zebra Technologies,Voltage Regulator/Reference,Wireless Communications,Infrastructure,2026,2.249966,AmericasAsia & Oceania (exc. Japan)EMEAJapanAm...,9638.288929,8887.313571,8136.338214,1360.00,1171.03,916.760000,5980.00,5260.88,4671.88,245.002,28785.672,3150.005,19135.005
9320,Zebra Technologies,Voltage Regulator/Reference,Wireless Communications,Infrastructure,2027,2.293834,AmericasAsia & Oceania (exc. Japan)EMEAJapanAm...,10389.264286,9638.288929,8887.313571,1493.00,1360.00,1171.030000,6364.00,5980.00,5260.88,249.001,28472.544,3126.003,20054.011
9321,Zebra Technologies,Voltage Regulator/Reference,Wireless Communications,Other Wireless,2025,1.229600,AmericasAsia & Oceania (exc. Japan)EMEAJapanAm...,8887.313571,8136.338214,7385.362857,1171.03,916.76,3668.885714,5260.88,4671.88,4536.76,498.000,29447.329,4771.001,18326.012
9322,Zebra Technologies,Voltage Regulator/Reference,Wireless Communications,Other Wireless,2026,1.182734,AmericasAsia & Oceania (exc. Japan)EMEAJapanAm...,9638.288929,8887.313571,8136.338214,1360.00,1171.03,916.760000,5980.00,5260.88,4671.88,505.001,28785.672,4675.999,19135.005


In [158]:
prediction_df.to_csv("../clean_data/prediction_df.csv")

### New Target
Once again, re-do the whole thing for the new target dataset, being mindful of lagged variables to be used, thus of which years we need to keep. 

In [88]:
#for instance, here because it's a log differential model, we keep 4 years from the whole dataset (from 2021 instead of from 2022), which we will then transform into 2024, 2025, 2026, 2027
new_pred = pd.read_csv("../clean_data/newtarget_df.csv", index_col=0)
new_pred = new_pred[new_pred["Year"]>=2021]
new_pred.drop('Target', axis = 1, inplace = True)
new_pred.replace({2021:2024, 2022:2025, 2023:2026, 2024:2027}, inplace=True)
new_pred = new_pred.iloc[:, :5]

#### Financial Data

In [89]:
comp_map = pd.read_csv("../input_data/financials_map.csv", delimiter= ",", encoding='unicode_escape', index_col = 0)
comp_map
comp_map = comp_map.set_index("Company Tag from S&P Global").to_dict()["Company Name from Spend Tracker"]
companies_available = comp_map.keys()

In [90]:
def mapping_companies(x):
    if x in companies_available:
        return comp_map[x]
    else :
        return None

In [91]:
#ebitda
ebitda = pd.read_csv("../clean_data/ebitda_complete.csv")
ebitda.Company = ebitda.Company.apply(mapping_companies)
ebitda = ebitda[~ebitda['Company'].isnull()]
ebitda["Ebitda_1"] = ebitda.groupby('Company')['Ebitda'].shift(1)
ebitda["Ebitda_2"] = ebitda.groupby('Company')['Ebitda'].shift(2)
remove_ebitda = ebitda[(ebitda.Ebitda_2 < 0) | (ebitda.Ebitda < 0) | (ebitda.Ebitda_1 < 0)].Company.unique()

#cogs
cogs = pd.read_csv("../clean_data/cogs_complete.csv")
cogs.Company = cogs.Company.apply(mapping_companies)
cogs = cogs[~cogs['Company'].isnull()]
cogs.rename(columns = {"Value" : "Cogs"}, inplace = True)
cogs["Cogs_1"] = cogs.groupby('Company')['Cogs'].shift(1)
cogs["Cogs_2"] = cogs.groupby('Company')['Cogs'].shift(2)
remove_cogs = cogs[(cogs.Cogs_2 < 0) | (cogs.Cogs < 0) | (cogs.Cogs_1 < 0)].Company.unique()
remove_cogs

#revenue
revenue = pd.read_csv("../clean_data/revenue_complete.csv")
revenue.Company = revenue.Company.apply(mapping_companies)
revenue = revenue[~revenue['Company'].isnull()]
revenue["Revenue_1"] = revenue.groupby('Company')['Revenue'].shift(1)
revenue["Revenue_2"] = revenue.groupby('Company')['Revenue'].shift(2)
remove_revenue = revenue[(revenue.Revenue_2 < 0) | (revenue.Revenue < 0) | (revenue.Revenue_1 < 0)].Company.unique()



In [92]:
new_pred = new_pred.merge(cogs, how = "left", on = ["Company", "Year"]).merge(revenue, how = "left", on = ["Company", "Year"]).merge(ebitda, how = "left", on = ["Company", "Year"])
new_pred = new_pred[~new_pred['Cogs'].isnull()].reset_index(drop = True)
new_pred = new_pred[~new_pred['Ebitda'].isnull()].reset_index(drop = True)
new_pred = new_pred[~new_pred['Ebitda_1'].isnull()].reset_index(drop = True)
new_pred = new_pred[~new_pred['Ebitda_2'].isnull()].reset_index(drop = True)
new_pred = new_pred[~new_pred.Company.isin(remove_ebitda)]
new_pred = new_pred[~new_pred.Company.isin(remove_cogs)]
new_pred = new_pred[~new_pred.Company.isin(remove_revenue)]

#### Market Data

In [94]:
market_spend = pd.read_csv("../input_data/market_data.csv")

In [95]:
mapping_dict = {('Automotive Electronics Categories', 'ADAS'): ('Automotive', 'Auto ADAS'),
    ('Automotive Electronics Categories', 'Body & Convenience'): ('Automotive', 'Other Auto & Aftermarket'),
    ('Automotive Electronics Categories', 'Chassis & Safety'): ('Automotive', 'Other Auto & Aftermarket'),
    ('Automotive Electronics Categories', 'Connectivity & Telematics'): ('Automotive', 'Other Auto & Aftermarket'),
    ('Automotive Electronics Categories', 'Hybrid & Electric Drive Train'): ('Automotive', 'Other Auto & Aftermarket'),
    ('Automotive Electronics Categories', 'Infotainment & Cluster'): ('Automotive', 'Auto Infotainment'),
    ('Automotive Electronics Categories', 'Other Automotive'): ('Automotive', 'Other Auto & Aftermarket'),
    ('Automotive Electronics Categories', 'Powertrain & Vehicle Dynamics'): ('Automotive', 'Auto Powertrain'),
    ('Computing & Data Storage Categories', 'Data Center Servers'): ('Computer Platforms', 'Data Center Servers'),
    ('Computing & Data Storage Categories', 'Desktop PCs'): ('Computer Platforms', 'Desktop PCs'),
    ('Computing & Data Storage Categories', 'Flash Storage Cards'): ('Computer Peripherals & Storage', 'Flash Cards/Drives'),
    ('Computing & Data Storage Categories', 'Hard Disk Drives'): ('Computer Peripherals & Storage', 'HDD'),
    ('Computing & Data Storage Categories', 'Notebook PCs'): ('Computer Platforms', 'Notebook PCs'),
    ('Computing & Data Storage Categories', 'Other Computing'): ('Computer Platforms', 'Other Computer Products'),
    ('Computing & Data Storage Categories', 'Other Data Storage'): ('Computer Peripherals & Storage', 'Other Storage'),
    ('Computing & Data Storage Categories', 'Other Peripherals'): ('Computer Peripherals & Storage', 'Other Peripherals'),
    ('Computing & Data Storage Categories', 'Smart Cards'): ('Computer Peripherals & Storage', 'Smart Cards'),
    ('Computing & Data Storage Categories', 'Solid-State Drives'): ('Computer Peripherals & Storage', 'Flash Cards/Drives'),
    ('Computing & Data Storage Categories', 'Tablet PCs'): ('Computer Platforms', 'Tablet PCs'),
    ('Computing & Data Storage Categories', 'USB Flash Drive'): ('Computer Peripherals & Storage', 'Flash Cards/Drives'),
    ('Consumer Electronics Categories', 'Fitness & Wellness Wearable Electronics'): ('Consumer', 'Other Consumer'),
    ('Consumer Electronics Categories', 'LCD TV'): ('Consumer', 'TV'),
    ('Consumer Electronics Categories', 'Major Home Appliances'): ('Consumer', 'Appliance'),
    ('Consumer Electronics Categories', 'OLED TV'): ('Consumer', 'TV'),
    ('Consumer Electronics Categories', 'Other Audio/Video'): ('Consumer', 'Audio'),
    ('Consumer Electronics Categories', 'Other Consumer Electronics'): ('Consumer', 'Other Consumer'),
    ('Consumer Electronics Categories', 'Set-Top Boxes'): ('Consumer', 'STB'),
    ('Consumer Electronics Categories', 'Smart Speakers & Digital Assistants'): ('Consumer', 'Connected Consumer'),
    ('Consumer Electronics Categories', 'Smart Watches'): ('Consumer', 'Other Consumer'),
    ('Consumer Electronics Categories', 'VR Headsets'): ('Consumer', 'Other Consumer'),
    ('Consumer Electronics Categories', 'Video Game Consoles'): ('Consumer', 'Video Games'),
    ('Industrial Electronics Categories', 'Automation'): ('Industrial', 'Other Industrial'),
    ('Industrial Electronics Categories', 'Building & Home Control'): ('Industrial', 'Other Industrial'),
    ('Industrial Electronics Categories', 'Lighting'): ('Industrial', 'Other Industrial'),
    ('Industrial Electronics Categories', 'Medical Electronics'): ('Industrial', 'Medical'),
    ('Industrial Electronics Categories', 'Military & Civil Aerospace'): ('Industrial', 'Military/Aerospace'),
    ('Industrial Electronics Categories', 'Other Industrial'): ('Industrial', 'Other Industrial'),
    ('Industrial Electronics Categories', 'Power & Energy'): ('Industrial', 'Power & Energy'),
    ('Industrial Electronics Categories', 'Security & Video Surveillance'): ('Industrial', 'Other Industrial'),
    ('Industrial Electronics Categories', 'Test & Measurement'): ('Industrial', 'Test & Measurement'),
    ('Industrial Electronics Categories', 'Other Industrial') : ('Industrial', 'Manufacturing Equipment'),
    ('Wired Communications Categories', 'Broadcast & Streaming Video'): ('Wired Communications', 'Other Wired'),
    ('Wired Communications Categories', 'Cable Aggregation Equipment'): ('Wired Communications', 'Carrier'),
    ('Wired Communications Categories', 'Cable CPE'): ('Wired Communications', 'Carrier'),
    ('Wired Communications Categories', 'Carrier Ethernet Switches & Routers'): ('Wired Communications', 'Carrier'),
    ('Wired Communications Categories', 'DSL Aggregation Equipment'): ('Wired Communications', 'Carrier'),
    ('Wired Communications Categories', 'DSL CPE'): ('Wired Communications', 'Carrier'),
    ('Wired Communications Categories', 'Data Center Network Switches'): ('Wired Communications', 'DC Network & Threat Mitigation'),
    ('Wired Communications Categories', 'Enterprise Ethernet Switches & Routers'): ('Wired Communications', 'Enterprise/SOHO'),
    ('Wired Communications Categories', 'Enterprise UC & Voice'): ('Wired Communications', 'Enterprise/SOHO'),
    ('Wired Communications Categories', 'FTTH Aggregation Equipment'): ('Wired Communications', 'Carrier'),
    ('Wired Communications Categories', 'FTTH CPE'): ('Wired Communications', 'Carrier'),
    ('Wired Communications Categories', 'Low-Tier Consumer/SOHO Routers'): ('Wired Communications', 'Enterprise/SOHO'),
    ('Wired Communications Categories', 'Optical Equipment'): ('Wired Communications', 'Other Wired'),
    ('Wired Communications Categories', 'Other Wired Communications'): ('Wired Communications', 'Other Wired'),
    ('Wired Communications Categories', 'Threat Mitigation Products'): ('Wired Communications', 'DC Network & Threat Mitigation'),
    ('Wireless Communications Categories', 'Gray Market Handsets'): ('Wireless Communications', 'Handset'),
    ('Wireless Communications Categories', 'High-Tier Smartphone'): ('Wireless Communications', 'Handset'),
    ('Wireless Communications Categories', 'Low-Tier Smartphone'): ('Wireless Communications', 'Handset'),
    ('Wireless Communications Categories', 'M2M Modules'): ('Wireless Communications', 'Other Wireless'),
    ('Wireless Communications Categories', 'Media Tablets'): ('Wireless Communications', 'Media Tablets'),
    ('Wireless Communications Categories', 'Mid-Tier Smartphone'): ('Wireless Communications', 'Handset'),
    ('Wireless Communications Categories', 'Mobile Comm Infrastructure'): ('Wireless Communications', 'Infrastructure'),
    ('Wireless Communications Categories', 'Mobile Phone (ULCH, Entry, Feature)'): ('Wireless Communications', 'Handset'),
    ('Wireless Communications Categories', 'Other Wireless Communications'): ('Wireless Communications', 'Other Wireless'),
    ('Wireless Communications Categories', 'Wireless LAN Equipment'): ('Wireless Communications', 'Infrastructure')}

In [96]:
#Rename the market and submarket
def remapping(market, submarket):
    key = (market, submarket)
    return mapping_dict[key][0], mapping_dict[key][1]

market_spend.Market, market_spend.Submarket = zip(*market_spend.apply(lambda x : remapping(x.Market, x.Submarket), axis = 1))
market_spend.rename(columns = {"Submarket" : "SubMarket"}, inplace = True)
market_spend

Unnamed: 0,Market,SubMarket,Device,Year,Region,Market size
0,Automotive,Auto ADAS,Amplifier/Comparator,2008,Americas,0.424
1,Automotive,Auto ADAS,Amplifier/Comparator,2008,Asia & Oceania (excl. Japan),0.246
2,Automotive,Auto ADAS,Amplifier/Comparator,2008,EMEA,0.678
3,Automotive,Auto ADAS,Amplifier/Comparator,2008,Japan,0.421
4,Automotive,Auto ADAS,Amplifier/Comparator,2008,Worldwide,1.769
...,...,...,...,...,...,...
32495,Wireless Communications,Infrastructure,Voltage Regulator/Reference,2027,Americas,0.000
32496,Wireless Communications,Infrastructure,Voltage Regulator/Reference,2027,Asia & Oceania (excl. Japan),0.000
32497,Wireless Communications,Infrastructure,Voltage Regulator/Reference,2027,EMEA,0.000
32498,Wireless Communications,Infrastructure,Voltage Regulator/Reference,2027,Japan,0.000


In [97]:
market_spend = market_spend[(market_spend.Region == "Worldwide")]
market_spend.drop("Region", axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  market_spend.drop("Region", axis = 1, inplace = True)


In [98]:
marketspendings = market_spend[["Market", "Year", "Market size"]].groupby(["Market", "Year"]).sum().reset_index()
submarketspendings = market_spend[["SubMarket", "Year", "Market size"]].groupby(["SubMarket", "Year"]).sum().reset_index()
devicespendings = market_spend[["Device", "Year", "Market size"]].groupby(["Device", "Year"]).sum().reset_index()

In [99]:
new_pred = new_pred.merge(market_spend, how = 'left', on = ['Market', "SubMarket", "Device", "Year"])
new_pred = new_pred.groupby(["Company", "Device", "Market", "SubMarket", "Year"]).sum().reset_index()
new_pred.rename(columns = {"Market size" : "Spendings"}, inplace = True)
new_pred = new_pred[new_pred['Spendings'] != 0]
new_pred

Unnamed: 0,Company,Device,Market,SubMarket,Year,Cogs,Cogs_1,Cogs_2,Revenue,Revenue_1,Revenue_2,Ebitda,Ebitda_1,Ebitda_2,Spendings
0,ABB,Amplifier/Comparator,Industrial,Manufacturing Equipment,2024,10122.197857,9879.678571,19712.000000,32970.35,32174.52,29446.00,5943.65,5807.680000,4477.000000,141.782
1,ABB,Amplifier/Comparator,Industrial,Manufacturing Equipment,2025,10364.717143,10122.197857,9879.678571,34510.78,32970.35,32174.52,6271.24,5943.650000,5807.680000,136.463
2,ABB,Amplifier/Comparator,Industrial,Manufacturing Equipment,2026,10607.236429,10364.717143,10122.197857,36835.00,34510.78,32970.35,6997.00,6271.240000,5943.650000,143.282
3,ABB,Amplifier/Comparator,Industrial,Manufacturing Equipment,2027,10849.755714,10607.236429,10364.717143,38700.25,36835.00,34510.78,7210.00,6997.000000,6271.240000,152.239
4,ABB,Amplifier/Comparator,Industrial,Power & Energy,2024,10122.197857,9879.678571,19712.000000,32970.35,32174.52,29446.00,5943.65,5807.680000,4477.000000,322.803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12434,Zebra Technologies,Voltage Regulator/Reference,Wireless Communications,Infrastructure,2027,20778.528571,19276.577857,17774.627143,12728.00,11960.00,10521.76,2986.00,2720.000000,2342.060000,249.001
12435,Zebra Technologies,Voltage Regulator/Reference,Wireless Communications,Other Wireless,2024,16272.676429,14770.725714,6314.000000,9343.76,9073.52,11562.00,1833.52,7337.771429,2280.000000,454.999
12436,Zebra Technologies,Voltage Regulator/Reference,Wireless Communications,Other Wireless,2025,17774.627143,16272.676429,14770.725714,10521.76,9343.76,9073.52,2342.06,1833.520000,7337.771429,498.000
12437,Zebra Technologies,Voltage Regulator/Reference,Wireless Communications,Other Wireless,2026,19276.577857,17774.627143,16272.676429,11960.00,10521.76,9343.76,2720.00,2342.060000,1833.520000,505.001


In [100]:
new_pred = new_pred.merge(marketspendings, how = 'left', on = ["Market", "Year"])
new_pred.rename(columns = {"Market size" : "MarketSpends"}, inplace = True)
new_pred = new_pred.merge(submarketspendings, how = 'left', on = ["SubMarket", "Year"])
new_pred.rename(columns = {"Market size" : "SubMarketSpends"}, inplace = True)
new_pred = new_pred.merge(devicespendings, how = 'left', on = ["Device", "Year"])
new_pred.rename(columns = {"Market size" : "DevSpends"}, inplace = True)
new_pred

Unnamed: 0,Company,Device,Market,SubMarket,Year,Cogs,Cogs_1,Cogs_2,Revenue,Revenue_1,Revenue_2,Ebitda,Ebitda_1,Ebitda_2,Spendings,MarketSpends,SubMarketSpends,DevSpends
0,ABB,Amplifier/Comparator,Industrial,Manufacturing Equipment,2024,10122.197857,9879.678571,19712.000000,32970.35,32174.52,29446.00,5943.65,5807.680000,4477.000000,141.782,17578.984,1268.295,5225.000
1,ABB,Amplifier/Comparator,Industrial,Manufacturing Equipment,2025,10364.717143,10122.197857,9879.678571,34510.78,32970.35,32174.52,6271.24,5943.650000,5807.680000,136.463,18718.436,1286.700,5641.009
2,ABB,Amplifier/Comparator,Industrial,Manufacturing Equipment,2026,10607.236429,10364.717143,10122.197857,36835.00,34510.78,32970.35,6997.00,6271.240000,5943.650000,143.282,20062.488,1354.429,5829.007
3,ABB,Amplifier/Comparator,Industrial,Manufacturing Equipment,2027,10849.755714,10607.236429,10364.717143,38700.25,36835.00,34510.78,7210.00,6997.000000,6271.240000,152.239,21590.113,1439.354,6035.001
4,ABB,Amplifier/Comparator,Industrial,Power & Energy,2024,10122.197857,9879.678571,19712.000000,32970.35,32174.52,29446.00,5943.65,5807.680000,4477.000000,322.803,17578.984,3481.204,5225.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12384,Zebra Technologies,Voltage Regulator/Reference,Wireless Communications,Infrastructure,2027,20778.528571,19276.577857,17774.627143,12728.00,11960.00,10521.76,2986.00,2720.000000,2342.060000,249.001,28472.544,3126.003,20054.011
12385,Zebra Technologies,Voltage Regulator/Reference,Wireless Communications,Other Wireless,2024,16272.676429,14770.725714,6314.000000,9343.76,9073.52,11562.00,1833.52,7337.771429,2280.000000,454.999,26647.319,4344.999,16829.005
12386,Zebra Technologies,Voltage Regulator/Reference,Wireless Communications,Other Wireless,2025,17774.627143,16272.676429,14770.725714,10521.76,9343.76,9073.52,2342.06,1833.520000,7337.771429,498.000,29447.329,4771.001,18326.012
12387,Zebra Technologies,Voltage Regulator/Reference,Wireless Communications,Other Wireless,2026,19276.577857,17774.627143,16272.676429,11960.00,10521.76,9343.76,2720.00,2342.060000,1833.520000,505.001,28785.672,4675.999,19135.005


Adjust everything to log_diff format

In [101]:
def log_diff(x, y):
    return np.log(1+x) - np.log(1+y)

In [102]:
new_pred["Spendings_1"] = new_pred.groupby(["Company", "Device", "Market", "SubMarket"]).Spendings.shift(1)
new_pred["MarketSpends_1"] = new_pred.groupby(["Company", "Device", "Market", "SubMarket"]).MarketSpends.shift(1)
new_pred["SubMarketSpends_1"] = new_pred.groupby(["Company", "Device", "Market", "SubMarket"]).SubMarketSpends.shift(1)
new_pred["DevSpends_1"] = new_pred.groupby(["Company", "Device", "Market", "SubMarket"]).DevSpends.shift(1)

new_pred["LogDiffSpendings"] = new_pred.apply(lambda x: log_diff(x.Spendings, x.Spendings_1), axis = 1)
new_pred["LogDiffMarketSpends"] = new_pred.apply(lambda x: log_diff(x.MarketSpends, x.MarketSpends_1), axis = 1)
new_pred["LogDiffSubMarketSpends"] = new_pred.apply(lambda x: log_diff(x.SubMarketSpends, x.SubMarketSpends_1), axis = 1)
new_pred["LogDiffDevSpends"] = new_pred.apply(lambda x: log_diff(x.DevSpends, x.DevSpends_1), axis = 1)

new_pred.drop(["Spendings", "Spendings_1", "MarketSpends", "MarketSpends_1", "SubMarketSpends", "SubMarketSpends_1", "DevSpends", "DevSpends_1"], axis = 1, inplace = True)

new_pred["LogDiffCogs"] = new_pred.apply(lambda x: log_diff(x.Cogs, x.Cogs_1), axis = 1)
new_pred["LogDiffRevenue"] = new_pred.apply(lambda x: log_diff(x.Revenue, x.Revenue_1), axis = 1)
new_pred["LogDiffEbitda"] = new_pred.apply(lambda x: log_diff(x.Ebitda, x.Ebitda_1), axis = 1)
new_pred["LogDiffCogs_1"] = new_pred.apply(lambda x: log_diff(x.Cogs_1, x.Cogs_2), axis = 1)
new_pred["LogDiffRevenue_1"] = new_pred.apply(lambda x: log_diff(x.Revenue_1, x.Revenue_2), axis = 1)
new_pred["LogDiffEbitda_1"] = new_pred.apply(lambda x: log_diff(x.Ebitda_1, x.Ebitda_2), axis = 1)

new_pred.drop(["Cogs_1", "Cogs_2", "Cogs", "Revenue", "Revenue_1", "Revenue_2", "Ebitda", "Ebitda_1", "Ebitda_2"], axis = 1, inplace = True)
new_pred

Unnamed: 0,Company,Device,Market,SubMarket,Year,LogDiffSpendings,LogDiffMarketSpends,LogDiffSubMarketSpends,LogDiffDevSpends,LogDiffCogs,LogDiffRevenue,LogDiffEbitda,LogDiffCogs_1,LogDiffRevenue_1,LogDiffEbitda_1
0,ABB,Amplifier/Comparator,Industrial,Manufacturing Equipment,2024,,,,,0.024248,0.024433,0.023138,-0.690697,0.088614,0.260177
1,ABB,Amplifier/Comparator,Industrial,Manufacturing Equipment,2025,-0.037964,0.062801,0.014396,0.076594,0.023674,0.045662,0.053642,0.024248,0.024433,0.023138
2,ABB,Amplifier/Comparator,Industrial,Manufacturing Equipment,2026,0.048415,0.069339,0.051260,0.032778,0.023127,0.065175,0.109491,0.023674,0.045662,0.053642
3,ABB,Amplifier/Comparator,Industrial,Manufacturing Equipment,2027,0.060229,0.073380,0.060771,0.034724,0.022604,0.049396,0.029983,0.023127,0.065175,0.109491
4,ABB,Amplifier/Comparator,Industrial,Power & Energy,2024,,,,,0.024248,0.024433,0.023138,-0.690697,0.088614,0.260177
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12384,Zebra Technologies,Voltage Regulator/Reference,Wireless Communications,Infrastructure,2027,0.016125,-0.010937,-0.007646,0.046907,0.075026,0.062231,0.093270,0.081114,0.128111,0.149542
12385,Zebra Technologies,Voltage Regulator/Reference,Wireless Communications,Other Wireless,2024,,,,,0.096834,0.029345,-1.386389,0.849787,-0.242340,1.168558
12386,Zebra Technologies,Voltage Regulator/Reference,Wireless Communications,Other Wireless,2025,0.090115,0.099911,0.093510,0.085213,0.088279,0.118725,0.244675,0.096834,0.029345,-1.386389
12387,Zebra Technologies,Voltage Regulator/Reference,Wireless Communications,Other Wireless,2026,0.013933,-0.022725,-0.020109,0.043196,0.081114,0.128111,0.149542,0.088279,0.118725,0.244675


In [103]:
new_pred = new_pred[~new_pred['LogDiffSpendings'].isnull()]
new_pred.to_csv("../clean_data/new_pred.csv")