# Support Vector Regression (SVR) Model 

Trained and tested a Support Vector Regression model to predict GDP from a set of 30+ features available in the GDP DataFrame

In [1]:
# Run the base Jupyter Notebook

%run BEA_Base_For_ML_Model.ipynb

BEA_Base_For_ML_Model completed - gdp_df ready for use


In [2]:
# Import libraries

from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [3]:
# View the dataframe

gdp_df.head()

Key Metrics,Year-Qtr,Gross domestic product,Personal consumption expenditures,Goods,Durable goods,Motor vehicles and parts,Furnishings and durable household equipment,Recreational goods and vehicles,Other durable goods,Nondurable goods,...,Fed- National Defense - Consumption expenditures,Fed- National Defense - Gross Investment,Nondefense,Fed - Nondefense - Consumption expenditures,Fed - Nondefense - Gross Investment,State and local,State and local - Consumption expenditures,State and local - Gross Investment,GDP_Growth,Is_Recession
0,1959Q1,510.33,309.449,169.723,43.649,18.09,15.143,6.29,4.125,126.074,...,42.349,18.787,11.23,8.456,2.774,44.829,30.172,14.656,0.0,True
1,1959Q2,522.653,315.505,172.617,45.465,19.335,15.548,6.374,4.208,127.152,...,41.394,18.895,13.29,10.441,2.849,44.955,30.541,14.414,0.024147,False
2,1959Q3,525.034,320.725,174.524,46.336,20.073,15.647,6.392,4.224,128.188,...,42.051,19.347,13.134,10.339,2.795,44.96,30.964,13.996,0.004556,False
3,1959Q4,528.6,322.842,173.564,44.103,17.658,15.733,6.42,4.293,129.461,...,42.233,18.722,12.88,9.942,2.938,44.772,31.494,13.278,0.006792,False
4,1960Q1,542.648,326.364,175.06,45.455,19.343,15.522,6.359,4.232,129.605,...,41.592,18.432,10.957,7.822,3.135,45.97,32.436,13.534,0.026576,False


In [4]:
# Define Features to predict on

X = gdp_df[['Housing and utilities',
            'Health care',
            'Transportation services',
            'Recreation services',
            'Food services and accommodations',
            'Financial services and insurance',
            'Other services',
            'Final Cons Expenditure nonprofit',
            'Motor vehicles and parts',
            'Furnishings and durable household equipment',
            'Recreational goods and vehicles',
            'Other durable goods',
            'Food and beverages (off-premises cons)',
            'Clothing and footwear', 
            'Gasoline and other energy goods',
            'Other nondurable goods',
            'Change in private inventories',
            'Structures',
            'Equipment',
            'Intellectual property products',
            'Information processing equipment',
            'Industrial equipment',
            'Transportation equipment',
            'Other equipment',
            'Computers and peripheral equipment',
            'Other',
            'Software',
            'Research and development',
            'Entertainment, literary, and artistic originals',
            'Federal',
            'State and local'
           ]]

In [5]:
# Define the target variable

y = gdp_df["Gross domestic product"]

In [6]:
# Split the test and train datasets

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [7]:

# Create the SVR model

model = make_pipeline(StandardScaler(), SVR(kernel = "linear", C=1.0, epsilon=0.5))

In [8]:
# Fit the model

model.fit(X_train, y_train)


In [9]:
# Predict values using the model

model.predict(X_test)

array([ 2877.37541447, 10136.73004535, 13864.23841225, 10679.20921071,
       12495.59472154, 19454.11606269,  9393.82583595,  7072.14766544,
        2685.50204788,  9601.73043917,  2799.38746596, 12808.87254076,
        3886.95827041, 11377.98273631, 11468.08815654, 11803.57714832,
       15949.82312755,  2683.00715168, 13390.34046308,  5189.69577268,
        5566.36113778,  5880.7674552 ,  2746.66291162, 10848.53995117,
        9916.96332058,  3027.36936002,  3774.87686015,  3019.93602507,
        8186.30643873,  2719.66431246, 15267.16296405, 14060.71850358,
        7295.83443331,  2887.60015381, 11945.66074425,  5073.72220058,
       12704.91429661,  2693.67553729,  5509.44751387,  4569.11581144,
        4532.14013307,  7781.35186897,  2805.4992901 ,  8498.41475035,
        2741.2386054 , 15367.20682557,  6815.3147526 ,  3488.20840681,
        4667.15316442,  9632.4625824 ,  4540.17765242, 11596.09578827,
        3292.48469119, 15914.36023896,  2726.57974234,  3179.82077679,
      

In [10]:
# Score the model

model.score(X_test, y_test)

0.8559185632856621