In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error

In [2]:
# Read loans data
link = 'https://spark-public.s3.amazonaws.com/dataanalysis/loansData.csv'

In [3]:
loan_data = pd.read_csv(link)
loan_data.head()

Unnamed: 0,Amount.Requested,Amount.Funded.By.Investors,Interest.Rate,Loan.Length,Loan.Purpose,Debt.To.Income.Ratio,State,Home.Ownership,Monthly.Income,FICO.Range,Open.CREDIT.Lines,Revolving.CREDIT.Balance,Inquiries.in.the.Last.6.Months,Employment.Length
81174,20000,20000.0,8.90%,36 months,debt_consolidation,14.90%,SC,MORTGAGE,6541.67,735-739,14.0,14272.0,2.0,< 1 year
99592,19200,19200.0,12.12%,36 months,debt_consolidation,28.36%,TX,MORTGAGE,4583.33,715-719,12.0,11140.0,1.0,2 years
80059,35000,35000.0,21.98%,60 months,debt_consolidation,23.81%,CA,MORTGAGE,11500.0,690-694,14.0,21977.0,1.0,2 years
15825,10000,9975.0,9.99%,36 months,debt_consolidation,14.30%,KS,MORTGAGE,3833.33,695-699,10.0,9346.0,0.0,5 years
33182,12000,12000.0,11.71%,36 months,credit_card,18.78%,NJ,RENT,3195.0,695-699,11.0,14469.0,0.0,9 years


In [4]:
y = loan_data['Interest.Rate']
x = loan_data.drop(['Interest.Rate'], axis = 1)
x = x.rename(index=str, columns={"Home.Ownership": "Home_Ownership", "Amount.Requested": "Amount_Requested", "Amount.Funded.By.Investors"
                            : "Amount_Funded_By_Investors", "Loan.Length":"Loan_Length", "Loan.Purpose": "Loan_Purpose",
                            "Debt.To.Income.Ratio": "Debt_To_Income_Ratio", "Home.Ownership":"Home_Ownership", "Monthly.Income"
                            : "Monthly_Income", "FICO.Range":"FICO_Range", "Open.CREDIT.Lines":"Open_CREDIT_Lines",
                            "Revolving.CREDIT.Balance": "Revolving_CREDIT_Balance", "Inquiries.in.the.Last.6.Months":
                            "Inquiries_in_the_Last_6_Months", "Employment.Length": "Employment_Length"})
print('Unique Loan_Purpose values:\n',x.Loan_Purpose.unique())
print('\n\nUnique Loan_Length values:\n', x.Loan_Length.unique())
print('\n\nDebt_To_Income_Ratio is of type ',type(x.Debt_To_Income_Ratio[0]), ' so I will convert that to int.\n\n')
print('\n\nUnique values in State:\n', x.State.unique())
print('\n\nUnique values in Home_Ownership:\n', x.Home_Ownership.unique())
print('\n\nUnique values in FICO_Range:\n', x.FICO_Range.unique())
# Int so do not have to convert. But will drop nans later
print('\n\nUnique values in Open_CREDIT_Lines:\n', x.Open_CREDIT_Lines.unique())
print('\n\nUnique values in Employment_Length:\n', x.Employment_Length.unique())
x.head()

Unique Loan_Purpose values:
 ['debt_consolidation' 'credit_card' 'other' 'moving' 'car' 'vacation'
 'home_improvement' 'house' 'major_purchase' 'educational' 'medical'
 'wedding' 'small_business' 'renewable_energy']


Unique Loan_Length values:
 ['36 months' '60 months']


Debt_To_Income_Ratio is of type  <class 'str'>  so I will convert that to int.




Unique values in State:
 ['SC' 'TX' 'CA' 'KS' 'NJ' 'CT' 'MA' 'LA' 'FL' 'DC' 'OH' 'AL' 'AZ' 'GA'
 'WV' 'NH' 'VA' 'NY' 'MD' 'HI' 'PA' 'WA' 'IL' 'NC' 'WI' 'SD' 'AK' 'DE'
 'MN' 'MO' 'RI' 'CO' 'NM' 'MI' 'OK' 'NV' 'UT' 'AR' 'KY' 'VT' 'OR' 'IA'
 'MT' 'IN' 'WY' 'MS']


Unique values in Home_Ownership:
 ['MORTGAGE' 'RENT' 'OWN' 'OTHER' 'NONE']


Unique values in FICO_Range:
 ['735-739' '715-719' '690-694' '695-699' '670-674' '720-724' '705-709'
 '685-689' '665-669' '725-729' '730-734' '740-744' '760-764' '675-679'
 '765-769' '780-784' '830-834' '660-664' '710-714' '785-789' '750-754'
 '700-704' '680-684' '755-759' '790-794' '810-814' '775-779' 

Unnamed: 0,Amount_Requested,Amount_Funded_By_Investors,Loan_Length,Loan_Purpose,Debt_To_Income_Ratio,State,Home_Ownership,Monthly_Income,FICO_Range,Open_CREDIT_Lines,Revolving_CREDIT_Balance,Inquiries_in_the_Last_6_Months,Employment_Length
81174,20000,20000.0,36 months,debt_consolidation,14.90%,SC,MORTGAGE,6541.67,735-739,14.0,14272.0,2.0,< 1 year
99592,19200,19200.0,36 months,debt_consolidation,28.36%,TX,MORTGAGE,4583.33,715-719,12.0,11140.0,1.0,2 years
80059,35000,35000.0,60 months,debt_consolidation,23.81%,CA,MORTGAGE,11500.0,690-694,14.0,21977.0,1.0,2 years
15825,10000,9975.0,36 months,debt_consolidation,14.30%,KS,MORTGAGE,3833.33,695-699,10.0,9346.0,0.0,5 years
33182,12000,12000.0,36 months,credit_card,18.78%,NJ,RENT,3195.0,695-699,11.0,14469.0,0.0,9 years


# Manipulating Data Types

As seen above, there are many columns in our data set that are categorical data/ strings instead of integers. In order to fit my data to a linear regression model, I have to convert all data types to integers that will fit with the model

In [5]:
# The column titled 'Loan_Length' has 2 options: 36 months of 60 months. I will change these columns simply to 36 or 60.
x_loan = x.Loan_Length.replace('36 months', int(36)).replace('60 months', int(60)) 
x = x.drop(['Loan_Length'], axis = 1)#.insert(0, column = 'Loan_Length', value = x_loan)
x['Loan_Length'] = pd.Series(x_loan, index=x.index)
x

Unnamed: 0,Amount_Requested,Amount_Funded_By_Investors,Loan_Purpose,Debt_To_Income_Ratio,State,Home_Ownership,Monthly_Income,FICO_Range,Open_CREDIT_Lines,Revolving_CREDIT_Balance,Inquiries_in_the_Last_6_Months,Employment_Length,Loan_Length
81174,20000,20000.00,debt_consolidation,14.90%,SC,MORTGAGE,6541.67,735-739,14.0,14272.0,2.0,< 1 year,36
99592,19200,19200.00,debt_consolidation,28.36%,TX,MORTGAGE,4583.33,715-719,12.0,11140.0,1.0,2 years,36
80059,35000,35000.00,debt_consolidation,23.81%,CA,MORTGAGE,11500.00,690-694,14.0,21977.0,1.0,2 years,60
15825,10000,9975.00,debt_consolidation,14.30%,KS,MORTGAGE,3833.33,695-699,10.0,9346.0,0.0,5 years,36
33182,12000,12000.00,credit_card,18.78%,NJ,RENT,3195.00,695-699,11.0,14469.0,0.0,9 years,36
62403,6000,6000.00,other,20.05%,CT,OWN,4891.67,670-674,17.0,10391.0,2.0,3 years,36
48808,10000,10000.00,debt_consolidation,26.09%,MA,RENT,2916.67,720-724,10.0,15957.0,0.0,10+ years,36
22090,33500,33450.00,credit_card,14.70%,LA,MORTGAGE,13863.42,705-709,12.0,27874.0,0.0,10+ years,60
76404,14675,14675.00,credit_card,26.92%,CA,RENT,3150.00,685-689,9.0,7246.0,1.0,8 years,36
15867,7000,7000.00,credit_card,7.10%,CA,RENT,5000.00,715-719,8.0,7612.0,0.0,3 years,36


Columns that are ints:

1. Open_CREDIT_Lines
2. Amount_Requested
3. Amount_Funded_By_Investors
4. Monthly_Income
5. Open_CREDIT_Lines
6. Revolving_CREDIT_Balance
7. Inquiries_in_the_Last_6_Months
8. Loan_Length
9. Debt_To_Income_Ratio
10. Employment_Length

In [6]:
wow = x.Debt_To_Income_Ratio.iloc[:,].str.strip('%')
wow = wow.astype(float)
wow = wow / 100
x = x.drop(['Debt_To_Income_Ratio'], axis = 1)
x['Debt_To_Income_Ratio'] = pd.Series(wow, index=x.index)



In [7]:
# Now to convert 'Employment_Length' to int values

x_emp = x.Employment_Length.str.strip('year').str.strip('years').str.replace('+', '').str.replace('< 1', '0.5').fillna(0).astype(float)
x = x.drop(['Employment_Length'], axis = 1)
x['Employment_Length'] = pd.Series(x_emp, index = x.index)

In [8]:
x = x.drop(['State'], axis = 1)

I am choosing to drop the state column because state populations are not equaly distributed and can result in unbalanced data

In [9]:
x.head()

Unnamed: 0,Amount_Requested,Amount_Funded_By_Investors,Loan_Purpose,Home_Ownership,Monthly_Income,FICO_Range,Open_CREDIT_Lines,Revolving_CREDIT_Balance,Inquiries_in_the_Last_6_Months,Loan_Length,Debt_To_Income_Ratio,Employment_Length
81174,20000,20000.0,debt_consolidation,MORTGAGE,6541.67,735-739,14.0,14272.0,2.0,36,0.149,0.5
99592,19200,19200.0,debt_consolidation,MORTGAGE,4583.33,715-719,12.0,11140.0,1.0,36,0.2836,2.0
80059,35000,35000.0,debt_consolidation,MORTGAGE,11500.0,690-694,14.0,21977.0,1.0,60,0.2381,2.0
15825,10000,9975.0,debt_consolidation,MORTGAGE,3833.33,695-699,10.0,9346.0,0.0,36,0.143,5.0
33182,12000,12000.0,credit_card,RENT,3195.0,695-699,11.0,14469.0,0.0,36,0.1878,9.0


For the FICO_Range category, I will replace the range by the mean of values. Ex: '695-699' will turn to '697'


Unique values in FICO_Range:
 ['735-739' '715-719' '690-694' '695-699' '670-674' '720-724' '705-709'
 '685-689' '665-669' '725-729' '730-734' '740-744' '760-764' '675-679'
 '765-769' '780-784' '830-834' '660-664' '710-714' '785-789' '750-754'
 '700-704' '680-684' '755-759' '790-794' '810-814' '775-779' '815-819'
 '745-749' '805-809' '800-804' '655-659' '770-774' '795-799' '640-644'
 '645-649' '820-824' '650-654']

In [10]:
x_fic = []
for l in x.FICO_Range:
    life = l.split('-')
    value = (int(life[0]) + int(life[1])) / 2
    print(value)
    x_fic.append(value)
    
x['FICO_Range_Mean'] = pd.Series(x_fic, index = x.index)

737.0
717.0
692.0
697.0
697.0
672.0
722.0
707.0
687.0
717.0
672.0
667.0
672.0
737.0
727.0
732.0
697.0
742.0
732.0
762.0
667.0
697.0
667.0
697.0
672.0
707.0
677.0
677.0
767.0
762.0
687.0
687.0
722.0
687.0
677.0
782.0
722.0
832.0
717.0
662.0
672.0
722.0
662.0
662.0
677.0
717.0
712.0
672.0
787.0
707.0
752.0
662.0
702.0
667.0
682.0
727.0
672.0
717.0
692.0
757.0
707.0
717.0
682.0
667.0
732.0
727.0
687.0
687.0
707.0
697.0
697.0
717.0
737.0
667.0
672.0
672.0
792.0
702.0
667.0
727.0
712.0
762.0
682.0
692.0
697.0
727.0
812.0
677.0
752.0
687.0
667.0
767.0
672.0
677.0
677.0
752.0
767.0
737.0
667.0
672.0
702.0
707.0
742.0
692.0
722.0
767.0
717.0
712.0
692.0
712.0
777.0
817.0
747.0
662.0
692.0
712.0
727.0
677.0
767.0
737.0
702.0
737.0
692.0
762.0
682.0
697.0
677.0
702.0
697.0
677.0
732.0
667.0
677.0
682.0
727.0
662.0
807.0
747.0
742.0
702.0
722.0
662.0
737.0
702.0
707.0
727.0
662.0
737.0
727.0
707.0
672.0
722.0
717.0
707.0
712.0
692.0
707.0
687.0
662.0
667.0
747.0
742.0
667.0
697.0
682.0
702.0
717.

672.0
787.0
672.0
717.0
767.0
732.0
692.0
662.0
747.0
767.0
717.0
767.0
702.0
727.0
697.0
787.0
672.0
792.0
727.0
797.0
672.0
732.0
712.0
662.0
697.0
722.0
677.0
707.0
722.0
762.0
687.0
667.0
727.0
662.0
712.0
662.0
752.0
672.0
717.0
722.0
707.0
802.0
667.0
752.0
717.0
707.0
687.0
672.0
667.0
737.0
682.0
682.0
697.0
667.0
672.0
677.0
702.0
682.0
762.0
687.0
687.0
677.0
792.0
677.0
662.0
662.0
742.0
747.0
697.0
667.0
722.0
692.0
677.0
747.0
732.0
682.0
672.0
722.0
682.0
682.0
692.0
697.0
662.0
702.0
692.0
667.0
662.0
732.0
682.0
672.0
707.0
762.0
702.0
677.0
697.0
697.0
677.0
682.0
697.0
667.0
672.0
707.0
727.0
757.0
707.0
697.0
727.0
702.0
727.0
782.0
682.0
682.0
677.0
712.0
687.0
777.0
682.0
707.0
702.0
707.0
702.0
672.0
667.0
737.0
717.0
727.0
697.0
732.0
737.0
682.0
747.0
752.0
687.0
682.0
727.0
672.0
692.0
702.0
667.0
692.0
677.0
707.0
717.0
717.0
737.0
747.0
662.0
667.0
662.0
717.0
687.0
732.0
662.0
772.0
682.0
697.0
737.0
692.0
702.0
717.0
702.0
702.0
687.0
707.0
712.0
767.0
682.

In [11]:
x = x.drop(['FICO_Range'], axis = 1)
x.head()

Unnamed: 0,Amount_Requested,Amount_Funded_By_Investors,Loan_Purpose,Home_Ownership,Monthly_Income,Open_CREDIT_Lines,Revolving_CREDIT_Balance,Inquiries_in_the_Last_6_Months,Loan_Length,Debt_To_Income_Ratio,Employment_Length,FICO_Range_Mean
81174,20000,20000.0,debt_consolidation,MORTGAGE,6541.67,14.0,14272.0,2.0,36,0.149,0.5,737.0
99592,19200,19200.0,debt_consolidation,MORTGAGE,4583.33,12.0,11140.0,1.0,36,0.2836,2.0,717.0
80059,35000,35000.0,debt_consolidation,MORTGAGE,11500.0,14.0,21977.0,1.0,60,0.2381,2.0,692.0
15825,10000,9975.0,debt_consolidation,MORTGAGE,3833.33,10.0,9346.0,0.0,36,0.143,5.0,697.0
33182,12000,12000.0,credit_card,RENT,3195.0,11.0,14469.0,0.0,36,0.1878,9.0,697.0


Unique values in Home_Ownership:
 ['MORTGAGE' 'RENT' 'OWN' 'OTHER' 'NONE']

In [12]:
x_dummy = pd.get_dummies(x['Home_Ownership'])
print(x_dummy)

        MORTGAGE  NONE  OTHER  OWN  RENT
81174          1     0      0    0     0
99592          1     0      0    0     0
80059          1     0      0    0     0
15825          1     0      0    0     0
33182          0     0      0    0     1
62403          0     0      0    1     0
48808          0     0      0    0     1
22090          1     0      0    0     0
76404          0     0      0    0     1
15867          0     0      0    0     1
94971          0     0      0    0     1
36911          1     0      0    0     0
41200          1     0      0    0     0
83869          0     0      0    0     1
53853          0     0      0    0     1
21399          0     0      0    0     1
62127          0     0      0    0     1
23446          0     0      0    0     1
44987          0     0      0    0     1
17977          1     0      0    0     0
86099          1     0      0    0     0
99483          1     0      0    0     0
28798          0     0      0    0     1
24168          0

In [13]:
x = x.drop(['Home_Ownership'], axis = 1)


In [14]:
x = pd.concat([x, x_dummy], axis = 1)
x.head()

Unnamed: 0,Amount_Requested,Amount_Funded_By_Investors,Loan_Purpose,Monthly_Income,Open_CREDIT_Lines,Revolving_CREDIT_Balance,Inquiries_in_the_Last_6_Months,Loan_Length,Debt_To_Income_Ratio,Employment_Length,FICO_Range_Mean,MORTGAGE,NONE,OTHER,OWN,RENT
81174,20000,20000.0,debt_consolidation,6541.67,14.0,14272.0,2.0,36,0.149,0.5,737.0,1,0,0,0,0
99592,19200,19200.0,debt_consolidation,4583.33,12.0,11140.0,1.0,36,0.2836,2.0,717.0,1,0,0,0,0
80059,35000,35000.0,debt_consolidation,11500.0,14.0,21977.0,1.0,60,0.2381,2.0,692.0,1,0,0,0,0
15825,10000,9975.0,debt_consolidation,3833.33,10.0,9346.0,0.0,36,0.143,5.0,697.0,1,0,0,0,0
33182,12000,12000.0,credit_card,3195.0,11.0,14469.0,0.0,36,0.1878,9.0,697.0,0,0,0,0,1


In [15]:
x.head()

Unnamed: 0,Amount_Requested,Amount_Funded_By_Investors,Loan_Purpose,Monthly_Income,Open_CREDIT_Lines,Revolving_CREDIT_Balance,Inquiries_in_the_Last_6_Months,Loan_Length,Debt_To_Income_Ratio,Employment_Length,FICO_Range_Mean,MORTGAGE,NONE,OTHER,OWN,RENT
81174,20000,20000.0,debt_consolidation,6541.67,14.0,14272.0,2.0,36,0.149,0.5,737.0,1,0,0,0,0
99592,19200,19200.0,debt_consolidation,4583.33,12.0,11140.0,1.0,36,0.2836,2.0,717.0,1,0,0,0,0
80059,35000,35000.0,debt_consolidation,11500.0,14.0,21977.0,1.0,60,0.2381,2.0,692.0,1,0,0,0,0
15825,10000,9975.0,debt_consolidation,3833.33,10.0,9346.0,0.0,36,0.143,5.0,697.0,1,0,0,0,0
33182,12000,12000.0,credit_card,3195.0,11.0,14469.0,0.0,36,0.1878,9.0,697.0,0,0,0,0,1


In [16]:
x_dummy_purpose = pd.get_dummies(x['Loan_Purpose'])
print(x_dummy_purpose)

        car  credit_card  debt_consolidation  educational  home_improvement  \
81174     0            0                   1            0                 0   
99592     0            0                   1            0                 0   
80059     0            0                   1            0                 0   
15825     0            0                   1            0                 0   
33182     0            1                   0            0                 0   
62403     0            0                   0            0                 0   
48808     0            0                   1            0                 0   
22090     0            1                   0            0                 0   
76404     0            1                   0            0                 0   
15867     0            1                   0            0                 0   
94971     0            0                   0            0                 0   
36911     0            0                   1        

In [17]:
x = pd.concat([x, x_dummy_purpose], axis = 1).drop(['Loan_Purpose'], axis = 1)
x.head()

Unnamed: 0,Amount_Requested,Amount_Funded_By_Investors,Monthly_Income,Open_CREDIT_Lines,Revolving_CREDIT_Balance,Inquiries_in_the_Last_6_Months,Loan_Length,Debt_To_Income_Ratio,Employment_Length,FICO_Range_Mean,...,home_improvement,house,major_purchase,medical,moving,other,renewable_energy,small_business,vacation,wedding
81174,20000,20000.0,6541.67,14.0,14272.0,2.0,36,0.149,0.5,737.0,...,0,0,0,0,0,0,0,0,0,0
99592,19200,19200.0,4583.33,12.0,11140.0,1.0,36,0.2836,2.0,717.0,...,0,0,0,0,0,0,0,0,0,0
80059,35000,35000.0,11500.0,14.0,21977.0,1.0,60,0.2381,2.0,692.0,...,0,0,0,0,0,0,0,0,0,0
15825,10000,9975.0,3833.33,10.0,9346.0,0.0,36,0.143,5.0,697.0,...,0,0,0,0,0,0,0,0,0,0
33182,12000,12000.0,3195.0,11.0,14469.0,0.0,36,0.1878,9.0,697.0,...,0,0,0,0,0,0,0,0,0,0


Since we cannot fit NAN's to our linear regression model, I will get rid of all the nan values in my data.

In [18]:
x.isna().sum()

Amount_Requested                  0
Amount_Funded_By_Investors        0
Monthly_Income                    1
Open_CREDIT_Lines                 2
Revolving_CREDIT_Balance          2
Inquiries_in_the_Last_6_Months    2
Loan_Length                       0
Debt_To_Income_Ratio              0
Employment_Length                 0
FICO_Range_Mean                   0
MORTGAGE                          0
NONE                              0
OTHER                             0
OWN                               0
RENT                              0
car                               0
credit_card                       0
debt_consolidation                0
educational                       0
home_improvement                  0
house                             0
major_purchase                    0
medical                           0
moving                            0
other                             0
renewable_energy                  0
small_business                    0
vacation                    

In [19]:
x = x.fillna(0)

In [20]:
# Removed the NANs
x.isna().sum()

Amount_Requested                  0
Amount_Funded_By_Investors        0
Monthly_Income                    0
Open_CREDIT_Lines                 0
Revolving_CREDIT_Balance          0
Inquiries_in_the_Last_6_Months    0
Loan_Length                       0
Debt_To_Income_Ratio              0
Employment_Length                 0
FICO_Range_Mean                   0
MORTGAGE                          0
NONE                              0
OTHER                             0
OWN                               0
RENT                              0
car                               0
credit_card                       0
debt_consolidation                0
educational                       0
home_improvement                  0
house                             0
major_purchase                    0
medical                           0
moving                            0
other                             0
renewable_energy                  0
small_business                    0
vacation                    

# Fitting the Model

In [21]:
y = y.iloc[:,].str.strip('%').astype(float)

In [22]:
y

81174      8.90
99592     12.12
80059     21.98
15825      9.99
33182     11.71
62403     15.31
48808      7.90
22090     17.14
76404     14.33
15867      6.91
94971     19.72
36911     14.27
41200     21.67
83869      8.90
53853      7.62
21399     15.65
62127     12.12
23446     10.37
44987      9.76
17977      9.99
86099     21.98
99483     19.05
28798     17.99
24168     11.99
10356     16.82
46027      7.90
2238      14.42
65278     15.31
4227       8.59
50182      7.90
          ...  
84265     22.95
80231      7.90
49533     22.45
102514    15.13
78618     18.75
86953     14.09
80129     14.09
85216      8.90
38247     11.71
91245     15.80
53041      6.03
63051      6.62
14446      7.51
68628     14.33
98758     10.16
13070     10.75
45836     17.27
52330     19.99
48243     15.81
63256     18.75
42124     11.71
78043      7.62
925       10.08
74047     23.28
49957     14.65
23735     16.77
65882     14.09
55610     13.99
38576     12.42
3116      13.79
Name: Interest.Rate, Len

In [23]:
y = y.rename(columns={'Interest.Rate':'Interest_Rate'}, inplace=True)

In [24]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.3)

In [25]:
predict_interest = LinearRegression().fit(X_train, y_train)


In [26]:
predict_interest.score(X_train, y_train)

0.7677736625899427

In [27]:
y_pred = predict_interest.predict(X_test)

In [28]:
r2_predict = r2_score(y_test, y_pred)
print('R2 Score:\t', r2_predict)

R2 Score:	 0.7584096395362063


In [29]:
mse_score = mean_squared_error(y_test, y_pred)
print('Mean Squared Error:\t', mse_score)

Mean Squared Error:	 4.144100873270067


In [35]:
print('Coeffecients:\t', predict_interest.coef_[0:,])

Coeffecients:	 [ 3.99980273e-05  1.29929381e-04 -4.58673691e-05 -2.96203423e-02
 -5.72817259e-07  3.25369661e-01  1.30138622e-01 -9.27325522e-02
  4.91468489e-04 -8.80288594e-02 -9.01487367e-01  2.86179030e+00
 -5.63004304e-01 -6.22612593e-01 -7.74686034e-01 -1.79232014e-01
 -5.07051811e-01 -3.85366772e-01  4.33787358e-01 -4.10392096e-01
  1.01744450e+00  1.24599715e-01 -1.83636794e-01  4.02948858e-01
  3.60852953e-01  1.73694151e-02 -6.79584231e-03 -1.92497291e-01
 -4.92030178e-01]


In [46]:
print('There are 29 coefficients for our', x.shape[1], 'columns')

There are 29 coefficients for our 29 columns
