# BCGX GenAI virtual Job Simulation

### Compile the extracted data from the SEC\'s EDGAR database into an Excel spreadsheet

In [1]:
file_path = '10-k_data.xlsx'

### Load the data from the Excel spreadsheet

In [2]:
import pandas as pd

In [3]:
df = pd.read_excel(file_path)

In [4]:
df

Unnamed: 0,Company,Year,Total Revenue,Net Income,Total Assets,Total Liabilities,Cash Flow from Operating Activities
0,Microsoft,2023,211915,72361,411976,205753,87582
1,Microsoft,2022,198270,72738,364840,198298,89035
2,Microsoft,2021,168088,61271,333779,191791,76740
3,Tesla,2023,96773,14974,106618,43009,13256
4,Tesla,2022,81462,12556,82338,36440,14724
5,Tesla,2021,53821,5519,62131,30548,11497
6,Apple,2023,383285,96995,352583,290437,110543
7,Apple,2022,394328,99803,352755,302083,122151
8,Apple,2021,365817,94680,351002,287912,104038


### Calculate year-over-year growth rates for each financial metric

In [5]:
df = df.sort_values(by=['Company', 'Year'])

In [6]:
df

Unnamed: 0,Company,Year,Total Revenue,Net Income,Total Assets,Total Liabilities,Cash Flow from Operating Activities
8,Apple,2021,365817,94680,351002,287912,104038
7,Apple,2022,394328,99803,352755,302083,122151
6,Apple,2023,383285,96995,352583,290437,110543
2,Microsoft,2021,168088,61271,333779,191791,76740
1,Microsoft,2022,198270,72738,364840,198298,89035
0,Microsoft,2023,211915,72361,411976,205753,87582
5,Tesla,2021,53821,5519,62131,30548,11497
4,Tesla,2022,81462,12556,82338,36440,14724
3,Tesla,2023,96773,14974,106618,43009,13256


In [7]:
df['Revenue Growth (%)'] = df.groupby('Company')['Total Revenue'].pct_change() * 100
df['Net Income Growth (%)'] = df.groupby('Company')['Net Income'].pct_change() * 100
df['Total Assets Growth (%)'] = df.groupby('Company')['Total Assets'].pct_change() * 100
df['Total Liabilities Growth (%)'] = df.groupby('Company')['Total Liabilities'].pct_change() * 100
df['Cash Flow from Operating Activities Growth (%)'] = df.groupby('Company')['Cash Flow from Operating Activities'].pct_change() * 100

In [8]:
df

Unnamed: 0,Company,Year,Total Revenue,Net Income,Total Assets,Total Liabilities,Cash Flow from Operating Activities,Revenue Growth (%),Net Income Growth (%),Total Assets Growth (%),Total Liabilities Growth (%),Cash Flow from Operating Activities Growth (%)
8,Apple,2021,365817,94680,351002,287912,104038,,,,,
7,Apple,2022,394328,99803,352755,302083,122151,7.793788,5.410858,0.499427,4.92199,17.409985
6,Apple,2023,383285,96995,352583,290437,110543,-2.800461,-2.813543,-0.048759,-3.855232,-9.502992
2,Microsoft,2021,168088,61271,333779,191791,76740,,,,,
1,Microsoft,2022,198270,72738,364840,198298,89035,17.956071,18.715216,9.305858,3.392756,16.021631
0,Microsoft,2023,211915,72361,411976,205753,87582,6.88203,-0.518299,12.919636,3.759493,-1.631942
5,Tesla,2021,53821,5519,62131,30548,11497,,,,,
4,Tesla,2022,81462,12556,82338,36440,14724,51.357277,127.504983,32.523217,19.287678,28.068192
3,Tesla,2023,96773,14974,106618,43009,13256,18.795267,19.257725,29.488207,18.026894,-9.970117


In [9]:
(394328-365817)/365817*100

7.7937876041846055

### Fill NA values that from pct_change calculations with 0 or an appropriate value

In [10]:
df.fillna(0, inplace=True)

#### Display the dataframe to verify the calculations

In [11]:
df

Unnamed: 0,Company,Year,Total Revenue,Net Income,Total Assets,Total Liabilities,Cash Flow from Operating Activities,Revenue Growth (%),Net Income Growth (%),Total Assets Growth (%),Total Liabilities Growth (%),Cash Flow from Operating Activities Growth (%)
8,Apple,2021,365817,94680,351002,287912,104038,0.0,0.0,0.0,0.0,0.0
7,Apple,2022,394328,99803,352755,302083,122151,7.793788,5.410858,0.499427,4.92199,17.409985
6,Apple,2023,383285,96995,352583,290437,110543,-2.800461,-2.813543,-0.048759,-3.855232,-9.502992
2,Microsoft,2021,168088,61271,333779,191791,76740,0.0,0.0,0.0,0.0,0.0
1,Microsoft,2022,198270,72738,364840,198298,89035,17.956071,18.715216,9.305858,3.392756,16.021631
0,Microsoft,2023,211915,72361,411976,205753,87582,6.88203,-0.518299,12.919636,3.759493,-1.631942
5,Tesla,2021,53821,5519,62131,30548,11497,0.0,0.0,0.0,0.0,0.0
4,Tesla,2022,81462,12556,82338,36440,14724,51.357277,127.504983,32.523217,19.287678,28.068192
3,Tesla,2023,96773,14974,106618,43009,13256,18.795267,19.257725,29.488207,18.026894,-9.970117


In [12]:
summary = df.groupby('Company').agg({
    'Revenue Growth (%)': 'mean',
    'Net Income Growth (%)': 'mean',
    'Total Assets Growth (%)': 'mean',
    'Total Liabilities Growth (%)': 'mean',
    'Cash Flow from Operating Activities Growth (%)': 'mean'
})
#.reset_index()

In [13]:
print("Year-over-Year Average Growth Rates (%):")
summary

Year-over-Year Average Growth Rates (%):


Unnamed: 0_level_0,Revenue Growth (%),Net Income Growth (%),Total Assets Growth (%),Total Liabilities Growth (%),Cash Flow from Operating Activities Growth (%)
Company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Apple,1.664442,0.865772,0.150223,0.355586,2.635664
Microsoft,8.279367,6.065639,7.408498,2.384083,4.796563
Tesla,23.384181,48.920903,20.670475,12.438191,6.032692


In [14]:
df.to_csv('cleaned_financial_data.csv')
summary.to_csv('summary.csv')