In [1]:
#  Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import pprint

#  Importing API key
from api_keys import bea_api_key

base_url = "https://apps.bea.gov/api/data/"

In [2]:
# BEA CLient for Industry Analysis

class BEAClientIndustry:

  def __init__(self, api_key):
    self.api_key = api_key
    self.base_url = base_url
    self.common_params = {
        'UserID': self.api_key,
        'method': 'GetData',
        'ResultFormat': 'JSON'
    }
    
  def make_request(self, params):
    try:
      full_params = {**self.common_params, **params}
      response = requests.get(self.base_url, params=full_params)
      response.raise_for_status()
      return response.json()
    except requests.exceptions.RequestException as e:
      print(f"API request failed: {e}")
      return None

  def get_gdp_industry_data(self, years=['2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025']):

    params = {
        'datasetname': 'GDPbyIndustry',
        'Year': ','.join(years),
        'Industry': 'ALL',
        'TableID': '5',  # Value Added by Industry
        'Frequency': 'Q'  
        }
    return self.make_request(params)
  
# Initializing Client
industry_analyzer = BEAClientIndustry(bea_api_key)
print("BEA Client initialized")

BEA Client initialized


In [3]:
def process_gdpIndustry_data(raw_data):
    if not raw_data or 'BEAAPI' not in raw_data:
        print('No Data Received. Invalid Input')
        return None 

    try:
        
        results_list = raw_data['BEAAPI']['Results']
        
        if not results_list or not isinstance(results_list, list):
            print("Results is not a list or is empty")
            return None
            
        results = results_list[0]['Data'] 
        
        processed_data = []
        for item in results:
            try:
                value = float(item['DataValue'].replace(',', ''))
            except (ValueError, AttributeError):
                continue
                
            processed_data.append({
                'Industry % of GDP': value,
                'Industry Description': item['IndustrYDescription'], 
                'Industry': item['Industry'],
                'NoteRef': item['NoteRef'],
                'Quarter': item['Quarter'],
                'TableID': item['TableID'],
                'Year': item['Year']
            })

        gdp_by_industry_df = pd.DataFrame(processed_data)
        return gdp_by_industry_df

    except Exception as e:
        print(f"Error processing data: {e}")
        return None

In [4]:
gdp_response = industry_analyzer.get_gdp_industry_data()
industry_percent_df = process_gdpIndustry_data(gdp_response)
industry_percent_df.reset_index(drop=True, inplace=True)
industry_percent_df

Unnamed: 0,Industry % of GDP,Industry Description,Industry,NoteRef,Quarter,TableID,Year
0,0.9,"Agriculture, forestry, fishing, and hunting",11,5,I,5,2018
1,0.7,Farms,111CA,5,I,5,2018
2,0.2,"Forestry, fishing, and related activities",113FF,5,I,5,2018
3,1.5,Mining,21,5,I,5,2018
4,0.9,Oil and gas extraction,211,5,I,5,2018
...,...,...,...,...,...,...,...
2965,2.8,Other real estate,ORE,5,II,5,2025
2966,15.9,Private goods-producing industries<sup>1</sup>,PGOOD,5;5.1.Q,II,5,2025
2967,13.1,Professional and business services,PROF,5,II,5,2025
2968,72.8,Private services-producing industries<sup>2</sup>,PSERV,5;5.2.Q,II,5,2025


In [5]:
# returning a list of all the industries in the df
industry_list = industry_percent_df['Industry Description'].unique()
print(industry_list)

['Agriculture, forestry, fishing, and hunting' 'Farms'
 'Forestry, fishing, and related activities' 'Mining'
 'Oil and gas extraction' 'Mining, except oil and gas'
 'Support activities for mining' 'Utilities' 'Construction'
 'Food and beverage and tobacco products'
 'Textile mills and textile product mills'
 'Apparel and leather and allied products' 'Manufacturing'
 'Nondurable goods' 'Wood products' 'Paper products'
 'Printing and related support activities' 'Petroleum and coal products'
 'Chemical products' 'Plastics and rubber products'
 'Nonmetallic mineral products' 'Primary metals'
 'Fabricated metal products' 'Machinery'
 'Computer and electronic products'
 'Electrical equipment, appliances, and components'
 'Motor vehicles, bodies and trailers, and parts'
 'Other transportation equipment' 'Furniture and related products'
 'Miscellaneous manufacturing' 'Durable goods' 'Wholesale trade'
 'Motor vehicle and parts dealers' 'Food and beverage stores'
 'Retail trade' 'General merchan

In [6]:
print(type(industry_list))

<class 'numpy.ndarray'>


In [7]:
total_elements = industry_list.size
print(total_elements)

97


In [8]:
industry_count = industry_percent_df['Industry Description'].nunique()
print(industry_count)

97


In [9]:
# writing to an excel file for future reference
# gdp_df.to_excel('Industry_Percent_of_GDP.xlsx', header=True, index=False)

In [10]:
# limiting the data to Q1 2025 to find main sectors
data_2025 = industry_percent_df[(industry_percent_df['Year'] == '2025')].sort_values(by='Industry % of GDP', ascending=False)
data_2025.reset_index(drop=True, inplace=True)
data_2025

Unnamed: 0,Industry % of GDP,Industry Description,Industry,NoteRef,Quarter,TableID,Year
0,100.0,Gross domestic product,GDP,5,II,5,2025
1,100.0,Gross domestic product,GDP,5,I,5,2025
2,88.7,Private industries,PVT,5,II,5,2025
3,88.7,Private industries,PVT,5,I,5,2025
4,72.8,Private services-producing industries<sup>2</sup>,PSERV,5;5.2.Q,II,5,2025
...,...,...,...,...,...,...,...
193,0.1,Printing and related support activities,323,5,II,5,2025
194,0.1,Water transportation,483,5,I,5,2025
195,0.1,"Funds, trusts, and other financial vehicles",525,5,I,5,2025
196,0.0,Apparel and leather and allied products,315AL,5,I,5,2025


In [11]:
# summing the entire column to get a baseline of combinations
total_percent = data_2025['Industry % of GDP'].sum()
total_percent

1089.7

In [12]:
large_sectors = data_2025[(data_2025['Industry % of GDP'] >= 50)].count()['Industry Description']
large_sectors

6

In [13]:
def clean_data(df):
  # major sectors listed BEA industry reports
  major_sectors = ['Finance and insurance',
                   'Agriculture, forestry, fishing, and hunting',
                   'Wholesale trade',
                   'Retail trade',
                   'Mining',
                   'Other services, except government',
                   'Management of companies and enterprises',
                   'Utilities',
                   'Arts, entertainment, and recreation',
                   'Accommodation and food services',
                   'Transportation and warehousing',
                   'Educational services',
                   'Construction',
                   'Nondurable goods manufacturing',
                   'Administrative and waste management',
                   'Federal government',
                   'Durable goods manufacturing',
                   'Professional, scienctific. and technical services',
                   'Health care and social assistance',
                   'State and local government',
                   'Information',
                   'Real estate and rental leasing']

  df_filtered = df[df['Industry Description'].isin(major_sectors)].copy()
  return df_filtered

major_sectors_df = clean_data(industry_percent_df)
major_sectors_df

Unnamed: 0,Industry % of GDP,Industry Description,Industry,NoteRef,Quarter,TableID,Year
0,0.9,"Agriculture, forestry, fishing, and hunting",11,5,I,5,2018
3,1.5,Mining,21,5,I,5,2018
7,1.6,Utilities,22,5,I,5,2018
8,4.3,Construction,23,5,I,5,2018
31,5.9,Wholesale trade,42,5,I,5,2018
...,...,...,...,...,...,...,...
2939,1.1,Educational services,61,5,II,5,2025
2940,7.7,Health care and social assistance,62,5,II,5,2025
2946,1.2,"Arts, entertainment, and recreation",71,5,II,5,2025
2949,3.2,Accommodation and food services,72,5,II,5,2025


In [14]:
def analyze_industry_trends(df):
  analysis = {}

  pivot_df = df.pivot_table(index='Year', columns='Industry Description', 
                      values='Industry % of GDP')
  growth_rates = pivot_df.pct_change()
  analysis['growth_rates'] = growth_rates

  avg_growth = growth_rates.mean().sort_values(ascending=False)
  analysis['average_growth_rate_by_industry'] = avg_growth

  return analysis

In [15]:
print(analyze_industry_trends(major_sectors_df))

{'growth_rates': Industry Description  Accommodation and food services  \
Year                                                    
2018                                              NaN   
2019                                         0.024000   
2020                                        -0.234375   
2021                                         0.183673   
2022                                         0.068966   
2023                                         0.056452   
2024                                         0.007634   
2025                                        -0.030303   

Industry Description  Agriculture, forestry, fishing, and hunting  \
Year                                                                
2018                                                          NaN   
2019                                                    -0.058824   
2020                                                    -0.062500   
2021                                                     0.300000  