In [1]:
import pandas as pd

In [2]:
url = "https://www.treasurydirect.gov/TA_WS/securities/search?format=json&startDate=2010-01-01&endDate=2023-07-10&dateFieldName=issueDate"

In [3]:
df = pd.read_json(url)

In [4]:
 df["pricePer100"].dtype.kind

'f'

In [5]:
df = df.query("pricePer100 != '' and securityType == 'Bill'")

Filter all pricePer100 that are empty string

In [6]:
df['pricePer100'].dtype.kind

'f'

In [7]:
df['pricePer100'] = df['pricePer100'].astype(float)

In [8]:
df['pricePer100']

0       99.396833
1       98.677972
2       97.340778
3       99.618625
4       99.207083
          ...    
4335    99.989889
4336    99.934278
4337    99.998056
4338    99.979778
4339    99.909000
Name: pricePer100, Length: 3052, dtype: float64

In [9]:
df['securityType'].dtype.kind

'O'

In [10]:
df.columns

Index(['cusip', 'issueDate', 'securityType', 'securityTerm', 'maturityDate',
       'interestRate', 'refCpiOnIssueDate', 'refCpiOnDatedDate',
       'announcementDate', 'auctionDate',
       ...
       'treasuryRetailTendersAccepted', 'type',
       'unadjustedAccruedInterestPer1000', 'unadjustedPrice',
       'updatedTimestamp', 'xmlFilenameAnnouncement',
       'xmlFilenameCompetitiveResults', 'xmlFilenameSpecialAnnouncement',
       'tintCusip1', 'tintCusip2'],
      dtype='object', length=118)

In [11]:
useful1 = []

In [12]:
for column in list(df.columns):
    if "date" in column.lower() :
        #print(column,"Is a numeric type")
        useful1.append(column)

In [13]:
useful1

['issueDate',
 'maturityDate',
 'refCpiOnIssueDate',
 'refCpiOnDatedDate',
 'announcementDate',
 'auctionDate',
 'auctionDateYear',
 'datedDate',
 'backDated',
 'backDatedDate',
 'callDate',
 'calledDate',
 'firstInterestPaymentDate',
 'frnIndexDeterminationDate',
 'indexRatioOnIssueDate',
 'maturingDate',
 'originalDatedDate',
 'originalIssueDate',
 'updatedTimestamp']

In [14]:
useful1.remove('indexRatioOnIssueDate')

In [15]:
useful1

['issueDate',
 'maturityDate',
 'refCpiOnIssueDate',
 'refCpiOnDatedDate',
 'announcementDate',
 'auctionDate',
 'auctionDateYear',
 'datedDate',
 'backDated',
 'backDatedDate',
 'callDate',
 'calledDate',
 'firstInterestPaymentDate',
 'frnIndexDeterminationDate',
 'maturingDate',
 'originalDatedDate',
 'originalIssueDate',
 'updatedTimestamp']

In [16]:
df =  df.loc[:, ~df.columns.isin(useful1)]

In [17]:
df

Unnamed: 0,cusip,securityType,securityTerm,interestRate,accruedInterestPer1000,accruedInterestPer100,adjustedAccruedInterestPer1000,adjustedPrice,allocationPercentage,allocationPercentageDecimals,...,treasuryRetailAccepted,treasuryRetailTendersAccepted,type,unadjustedAccruedInterestPer1000,unadjustedPrice,xmlFilenameAnnouncement,xmlFilenameCompetitiveResults,xmlFilenameSpecialAnnouncement,tintCusip1,tintCusip2
0,912796Z36,Bill,42-Day,,,,,,58.77,2,...,0,No,CMB,,,A_20230629_1.xml,R_20230703_1.xml,,,
1,912796YJ2,Bill,13-Week,,,,,,2.88,2,...,1138450400,Yes,Bill,,,A_20230629_3.xml,R_20230703_2.xml,,,
2,912797FW2,Bill,26-Week,,,,,,72.92,2,...,951483100,Yes,Bill,,,A_20230629_2.xml,R_20230703_3.xml,,,
3,912797GA9,Bill,4-Week,,,,,,41.37,2,...,2624141300,Yes,Bill,,,A_20230627_3.xml,R_20230629_2.xml,,,
4,912797GJ0,Bill,8-Week,,,,,,64.18,2,...,908067100,Yes,Bill,,,A_20230627_1.xml,R_20230629_1.xml,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4335,912795UM1,Bill,13-Week,,,,,,17.72,2,...,942600600,Yes,Bill,,,A_20100107_2.xml,R_20100111_1.xml,,,
4336,912795UY5,Bill,26-Week,,,,,,55.07,2,...,686851500,Yes,Bill,,,A_20100107_5.xml,R_20100111_2.xml,,,
4337,912795S85,Bill,4-Week,,,,,,85.42,2,...,95969300,Yes,Bill,,,A_20100104_1.xml,R_20100105_1.xml,,,
4338,912795U33,Bill,13-Week,,,,,,32.27,2,...,883422700,Yes,Bill,,,A_20091231_2.xml,R_20100104_2.xml,,,


One hot encoding on security Term

In [18]:
df['securityType']

0       Bill
1       Bill
2       Bill
3       Bill
4       Bill
        ... 
4335    Bill
4336    Bill
4337    Bill
4338    Bill
4339    Bill
Name: securityType, Length: 3052, dtype: object

In [19]:
from sklearn.preprocessing import OneHotEncoder

In [20]:
terms_categories = df['securityTerm'].unique()

In [21]:
#terms_categories

In [22]:
#enc = OneHotEncoder(categories=[terms_categories])

In [23]:
useful = []

In [24]:
for column in list(df.columns):
    if df[column].dtype.kind in 'biufc':
        #print(column,"Is a numeric type")
        useful.append(column)

In [25]:
useful

['allocationPercentage',
 'allocationPercentageDecimals',
 'bidToCoverRatio',
 'competitiveAccepted',
 'competitiveBidDecimals',
 'competitiveTendered',
 'directBidderAccepted',
 'directBidderTendered',
 'fimaNoncompetitiveAccepted',
 'fimaNoncompetitiveTendered',
 'highPrice',
 'indirectBidderAccepted',
 'indirectBidderTendered',
 'maximumCompetitiveAward',
 'maximumNoncompetitiveAward',
 'maximumSingleBid',
 'minimumBidAmount',
 'minimumToIssue',
 'multiplesToBid',
 'multiplesToIssue',
 'nlpExclusionAmount',
 'nlpReportingThreshold',
 'noncompetitiveAccepted',
 'offeringAmount',
 'pricePer100',
 'primaryDealerAccepted',
 'primaryDealerTendered',
 'somaAccepted',
 'somaHoldings',
 'somaTendered',
 'totalAccepted',
 'totalTendered']

Also add auctionDateYear, because we do not need it

In [26]:
df =  df.loc[:, df.columns.isin(useful)]

In [27]:
df

Unnamed: 0,allocationPercentage,allocationPercentageDecimals,bidToCoverRatio,competitiveAccepted,competitiveBidDecimals,competitiveTendered,directBidderAccepted,directBidderTendered,fimaNoncompetitiveAccepted,fimaNoncompetitiveTendered,...,noncompetitiveAccepted,offeringAmount,pricePer100,primaryDealerAccepted,primaryDealerTendered,somaAccepted,somaHoldings,somaTendered,totalAccepted,totalTendered
0,58.77,2,2.73,49914293300,3,136168100000,972232500,4365000000,0,0,...,86183000,50000000000,99.396833,23585745000,97960000000,0,12399000000,0,50000476300,136254283000
1,2.88,2,3.00,62654771500,3,192824878700,1422880000,5520000000,15000000,15000000,...,2331737800,65000000000,98.677972,23254040000,123885000000,5470793800,12399000000,5470793800,70472303100,200642410300
2,72.92,2,3.02,54032912000,3,171074712000,811460000,5175000000,1500000000,1500000000,...,2467427000,58000000000,97.340778,17006680000,113255000000,4881631300,12399000000,4881631300,62881970300,179923770300
3,41.37,2,2.53,66804501000,3,174106691000,1975275000,6565000000,0,0,...,3195543300,70000000000,99.618625,28852535000,127565000000,786630400,2369000000,786630400,70786674700,178088864700
4,64.18,2,2.61,57561890000,3,154156250000,1400000000,6100000000,1038000000,1038000000,...,1400126100,60000000000,99.207083,27938760000,116760000000,674254600,2369000000,674254600,60674270700,157268630700
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4335,17.72,2,4.09,22549550000,3,96606870000,2986630000,8130000000,250000000,250000000,...,1200736000,24000000000,99.989889,14044240000,80300000000,0,5938000000,0,24000286000,98057606000
4336,55.07,2,4.11,23576028500,3,101439000000,2412571000,7895000000,500000000,500000000,...,924525700,25000000000,99.934278,14207520000,81375000000,0,5938000000,0,25000554200,102863525700
4337,85.42,2,5.50,15749798000,3,87827728000,385000000,8010000000,0,0,...,250580400,16000000000,99.998056,6680140000,67678000000,2418028000,2418000000,2418028000,18418406400,90496336400
4338,32.27,2,4.34,23736478500,3,107118316000,2125215000,5645000000,50000000,50000000,...,1213724700,25000000000,99.979778,11584235000,85650000000,0,2418000000,0,25000203200,108382040700


In [28]:
df.columns

Index(['allocationPercentage', 'allocationPercentageDecimals',
       'bidToCoverRatio', 'competitiveAccepted', 'competitiveBidDecimals',
       'competitiveTendered', 'directBidderAccepted', 'directBidderTendered',
       'fimaNoncompetitiveAccepted', 'fimaNoncompetitiveTendered', 'highPrice',
       'indirectBidderAccepted', 'indirectBidderTendered',
       'maximumCompetitiveAward', 'maximumNoncompetitiveAward',
       'maximumSingleBid', 'minimumBidAmount', 'minimumToIssue',
       'multiplesToBid', 'multiplesToIssue', 'nlpExclusionAmount',
       'nlpReportingThreshold', 'noncompetitiveAccepted', 'offeringAmount',
       'pricePer100', 'primaryDealerAccepted', 'primaryDealerTendered',
       'somaAccepted', 'somaHoldings', 'somaTendered', 'totalAccepted',
       'totalTendered'],
      dtype='object')

Now normalize each column

In [29]:
df['pricePer100'].dtype

dtype('float64')

subtract 100 from pricePer100

In [30]:
df_z_scaled = df.copy()
  
# apply normalization technique to Column 1
for column in list(df_z_scaled.columns):
    df_z_scaled[column] = (df_z_scaled[column] - df_z_scaled[column].mean()) / df_z_scaled[column].std()    

# view normalized data  
display(df_z_scaled)

Unnamed: 0,allocationPercentage,allocationPercentageDecimals,bidToCoverRatio,competitiveAccepted,competitiveBidDecimals,competitiveTendered,directBidderAccepted,directBidderTendered,fimaNoncompetitiveAccepted,fimaNoncompetitiveTendered,...,noncompetitiveAccepted,offeringAmount,pricePer100,primaryDealerAccepted,primaryDealerTendered,somaAccepted,somaHoldings,somaTendered,totalAccepted,totalTendered
0,0.358186,,-1.104907,1.249581,,0.370189,-0.931762,-0.612407,-0.812043,-0.812043,...,-0.876154,1.162283,-0.613334,0.744230,-0.062884,-0.568880,1.625579,-0.568880,0.968662,0.293919
1,-1.562789,,-0.774703,2.362309,,2.311185,-0.609612,-0.343810,-0.767669,-0.767669,...,3.447747,2.446294,-1.947691,0.689795,1.074012,1.953192,1.625579,1.953192,2.575147,2.434232
2,0.844531,,-0.750244,1.609293,,1.566049,-1.046692,-0.424040,3.625382,3.625382,...,3.709022,1.847089,-4.429802,-0.335425,0.607852,1.681584,1.625579,1.681584,1.979511,1.745528
3,-0.239863,,-1.349503,2.724739,,1.669921,-0.214728,-0.100794,-0.812043,-0.812043,...,5.111038,2.874298,-0.201642,1.608534,1.235393,-0.206239,-0.344101,-0.206239,2.599816,1.684534
4,0.544131,,-1.251665,1.917507,,0.986442,-0.625968,-0.208930,2.258655,2.258655,...,1.653894,2.018291,-0.965550,1.458579,0.761558,-0.258045,-0.344101,-0.258045,1.806266,0.992453
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4335,-1.052729,,0.558342,-1.140401,,-0.985134,0.508249,0.263150,-0.072472,-0.072472,...,1.269960,-1.063337,0.487501,-0.821574,-0.837333,-0.568880,0.356775,-0.568880,-1.071649,-0.975769
4336,0.231015,,0.582802,-1.050750,,-0.819590,0.097877,0.208500,0.667099,0.667099,...,0.738107,-0.977736,0.384276,-0.794779,-0.790191,-0.568880,0.356775,-0.568880,-0.993155,-0.816016
4337,1.274164,,2.282741,-1.734278,,-1.285897,-1.351550,0.235244,-0.812043,-0.812043,...,-0.559601,-1.748143,0.502661,-2.030056,-1.390850,0.545846,-0.334479,0.545846,-1.509675,-1.227111
4338,-0.552636,,0.864087,-1.036737,,-0.625024,-0.107542,-0.314741,-0.664129,-0.664129,...,1.294971,-0.977736,0.468733,-1.225272,-0.602718,-0.568880,-0.334479,-0.568880,-0.993182,-0.632576


In [31]:
#df.dropna(axis='columns')
#Drop allocationPercentageDecimals and competitiveBidDecimals

Try with sklearn

In [32]:
df2 = df.copy()
  
# apply normalization technique to Column 1
for column in list(df2.columns):
    df2[column] = (df2[column] - df2[column].mean())    
  
# view normalized data  
display(df2)

Unnamed: 0,allocationPercentage,allocationPercentageDecimals,bidToCoverRatio,competitiveAccepted,competitiveBidDecimals,competitiveTendered,directBidderAccepted,directBidderTendered,fimaNoncompetitiveAccepted,fimaNoncompetitiveTendered,...,noncompetitiveAccepted,offeringAmount,pricePer100,primaryDealerAccepted,primaryDealerTendered,somaAccepted,somaHoldings,somaTendered,totalAccepted,totalTendered
0,10.421281,0.0,-0.903457,1.430742e+10,0.0,1.080564e+10,-1.303420e+09,-2.633424e+09,-2.744980e+08,-2.744980e+08,...,-4.550177e+08,1.357795e+10,-0.330423,4.535096e+09,-1.433967e+09,-1.233996e+09,8.277769e+09,-1.233996e+09,1.234391e+10,8.842124e+09
1,-45.468719,0.0,-0.633457,2.704790e+10,0.0,6.746241e+10,-8.527725e+08,-1.478424e+09,-2.594980e+08,-2.594980e+08,...,1.790537e+09,2.857795e+10,-1.049284,4.203391e+09,2.449103e+10,4.236798e+09,8.277769e+09,4.236798e+09,3.281573e+10,7.323025e+10
2,24.571281,0.0,-0.613457,1.842604e+10,0.0,4.571225e+10,-1.464192e+09,-1.823424e+09,1.225502e+09,1.225502e+09,...,1.926226e+09,2.157795e+10,-2.386478,-2.043969e+09,1.386103e+10,3.647635e+09,8.277769e+09,3.647635e+09,2.522540e+10,5.251161e+10
3,-6.978719,0.0,-1.103457,3.119763e+10,0.0,4.874423e+10,-3.003775e+08,-4.334244e+08,-2.744980e+08,-2.744980e+08,...,2.654343e+09,3.357795e+10,-0.108631,9.801886e+09,2.817103e+10,-4.473657e+08,-1.752231e+09,-4.473657e+08,3.313010e+10,5.067671e+10
4,15.831281,0.0,-1.023457,2.195501e+10,0.0,2.879379e+10,-8.756525e+08,-8.984244e+08,7.635020e+08,7.635020e+08,...,8.589254e+08,2.357795e+10,-0.520173,8.888111e+09,1.736603e+10,-5.597415e+08,-1.752231e+09,-5.597415e+08,2.301770e+10,2.985647e+10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4335,-30.628719,0.0,0.456543,-1.305733e+10,0.0,-2.875559e+10,7.109775e+08,1.131576e+09,-2.449800e+07,-2.449800e+07,...,6.595353e+08,-1.242205e+10,0.262633,-5.006409e+09,-1.909397e+10,-1.233996e+09,1.816769e+09,-1.233996e+09,-1.365628e+10,-2.935455e+10
4336,6.721281,0.0,0.476543,-1.203085e+10,0.0,-2.392346e+10,1.369185e+08,8.965756e+08,2.255020e+08,2.255020e+08,...,3.833250e+08,-1.142205e+10,0.207022,-4.843129e+09,-1.801897e+10,-1.233996e+09,1.816769e+09,-1.233996e+09,-1.265602e+10,-2.454863e+10
4337,37.071281,0.0,1.866543,-1.985708e+10,0.0,-3.753474e+10,-1.890652e+09,1.011576e+09,-2.744980e+08,-2.744980e+08,...,-2.906203e+08,-2.042205e+10,0.270800,-1.237051e+10,-3.171597e+10,1.184032e+09,-1.703231e+09,1.184032e+09,-1.923816e+10,-3.691582e+10
4338,-16.078719,0.0,0.706543,-1.187040e+10,0.0,-1.824415e+10,-1.504375e+08,-1.353424e+09,-2.244980e+08,-2.244980e+08,...,6.725240e+08,-1.142205e+10,0.252522,-7.466414e+09,-1.374397e+10,-1.233996e+09,-1.703231e+09,-1.233996e+09,-1.265637e+10,-1.903012e+10


In [33]:

from sklearn.preprocessing import MinMaxScaler
import numpy as np
  
# copy the data
df_sklearn = df.copy()
  
# apply normalization techniques
for column in list(df_sklearn.columns):
    df_sklearn[column] = MinMaxScaler().fit_transform(np.array(df_sklearn[column]).reshape(-1,1))
  
# view normalized data  
display(df_sklearn)


Unnamed: 0,allocationPercentage,allocationPercentageDecimals,bidToCoverRatio,competitiveAccepted,competitiveBidDecimals,competitiveTendered,directBidderAccepted,directBidderTendered,fimaNoncompetitiveAccepted,fimaNoncompetitiveTendered,...,noncompetitiveAccepted,offeringAmount,pricePer100,primaryDealerAccepted,primaryDealerTendered,somaAccepted,somaHoldings,somaTendered,totalAccepted,totalTendered
0,0.588012,0.0,0.079493,0.563280,0.0,0.481872,0.084059,0.1746,0.0000,0.0000,...,0.019079,0.555432,0.878998,0.518589,0.522438,0.000000,0.529193,0.000000,0.530195,0.476148
1,0.028720,0.0,0.110599,0.707128,0.0,0.682540,0.123021,0.2208,0.0075,0.0075,...,0.516194,0.722145,0.734787,0.511288,0.660864,0.601035,0.529193,0.601035,0.747382,0.701347
2,0.729611,0.0,0.112903,0.609782,0.0,0.605505,0.070158,0.2070,0.7500,0.7500,...,0.546232,0.644346,0.466532,0.373779,0.604106,0.536309,0.529193,0.536309,0.666856,0.628883
3,0.413890,0.0,0.056452,0.753981,0.0,0.616243,0.170781,0.2626,0.0000,0.0000,...,0.707421,0.777716,0.923492,0.634515,0.680513,0.086421,0.101110,0.086421,0.750717,0.622465
4,0.642150,0.0,0.065668,0.649627,0.0,0.545583,0.121043,0.2440,0.5190,0.5190,...,0.309956,0.666574,0.840932,0.614402,0.622820,0.074075,0.101110,0.074075,0.643434,0.549646
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4335,0.177224,0.0,0.236175,0.254316,0.0,0.341754,0.258222,0.3252,0.1250,0.1250,...,0.265816,0.266463,0.997972,0.308574,0.428144,0.000000,0.253436,0.000000,0.254356,0.342555
4336,0.550986,0.0,0.238479,0.265905,0.0,0.358869,0.208589,0.3158,0.2500,0.2500,...,0.204669,0.277577,0.986815,0.312167,0.433884,0.000000,0.253436,0.000000,0.264968,0.359364
4337,0.854698,0.0,0.398618,0.177543,0.0,0.310660,0.033287,0.3204,0.0000,0.0000,...,0.055473,0.177549,0.999610,0.146484,0.360749,0.265651,0.103201,0.265651,0.195137,0.316109
4338,0.322826,0.0,0.264977,0.267717,0.0,0.378984,0.183745,0.2258,0.0250,0.0250,...,0.268691,0.277577,0.995943,0.254427,0.456710,0.000000,0.103201,0.000000,0.264964,0.378665


In [34]:
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
                  hidden_layer_sizes=(15,), random_state=1)

drop any nans

In [35]:
df_sklearn.dropna(how='all', inplace=True, axis='rows')

In [36]:
#clf.fit(X,y)

In [37]:
#clf.fit(X, y)

In [38]:
#X

In [39]:
df_z_scaled.dropna(axis='rows', how='all', inplace=True)
#Drop allocationPercentageDecimals and competitiveBidDecimals

In [40]:
df_z_scaled.dropna(axis='columns', how='all', inplace=True)

In [41]:
df_z_scaled.fillna(0.0, inplace=True)

In [42]:
df_z_scaled

Unnamed: 0,allocationPercentage,bidToCoverRatio,competitiveAccepted,competitiveTendered,directBidderAccepted,directBidderTendered,fimaNoncompetitiveAccepted,fimaNoncompetitiveTendered,highPrice,indirectBidderAccepted,...,noncompetitiveAccepted,offeringAmount,pricePer100,primaryDealerAccepted,primaryDealerTendered,somaAccepted,somaHoldings,somaTendered,totalAccepted,totalTendered
0,0.358186,-1.104907,1.249581,0.370189,-0.931762,-0.612407,-0.812043,-0.812043,-0.613334,1.319830,...,-0.876154,1.162283,-0.613334,0.744230,-0.062884,-0.568880,1.625579,-0.568880,0.968662,0.293919
1,-1.562789,-0.774703,2.362309,2.311185,-0.609612,-0.343810,-0.767669,-0.767669,-1.947691,2.823864,...,3.447747,2.446294,-1.947691,0.689795,1.074012,1.953192,1.625579,1.953192,2.575147,2.434232
2,0.844531,-0.750244,1.609293,1.566049,-1.046692,-0.424040,3.625382,3.625382,-4.429802,2.613768,...,3.709022,1.847089,-4.429802,-0.335425,0.607852,1.681584,1.625579,1.681584,1.979511,1.745528
3,-0.239863,-1.349503,2.724739,1.669921,-0.214728,-0.100794,-0.812043,-0.812043,-0.201642,2.585397,...,5.111038,2.874298,-0.201642,1.608534,1.235393,-0.206239,-0.344101,-0.206239,2.599816,1.684534
4,0.544131,-1.251665,1.917507,0.986442,-0.625968,-0.208930,2.258655,2.258655,-0.965550,1.661452,...,1.653894,2.018291,-0.965550,1.458579,0.761558,-0.258045,-0.344101,-0.258045,1.806266,0.992453
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4335,-1.052729,0.558342,-1.140401,-0.985134,0.508249,0.263150,-0.072472,-0.072472,0.487501,-1.044103,...,1.269960,-1.063337,0.487501,-0.821574,-0.837333,-0.568880,0.356775,-0.568880,-1.071649,-0.975769
4336,0.231015,0.582802,-1.050750,-0.819590,0.097877,0.208500,0.667099,0.667099,0.384276,-0.872833,...,0.738107,-0.977736,0.384276,-0.794779,-0.790191,-0.568880,0.356775,-0.568880,-0.993155,-0.816016
4337,1.274164,2.282741,-1.734278,-1.285897,-1.351550,0.235244,-0.812043,-0.812043,0.502661,-0.666832,...,-0.559601,-1.748143,0.502661,-2.030056,-1.390850,0.545846,-0.334479,0.545846,-1.509675,-1.227111
4338,-0.552636,0.864087,-1.036737,-0.625024,-0.107542,-0.314741,-0.664129,-0.664129,0.468733,-0.506870,...,1.294971,-0.977736,0.468733,-1.225272,-0.602718,-0.568880,-0.334479,-0.568880,-0.993182,-0.632576


In [43]:
from sklearn.model_selection import train_test_split

In [44]:
y = df_z_scaled["pricePer100"].copy().to_numpy()
X=df_z_scaled.copy().drop(columns=['pricePer100']).to_numpy()


In [45]:
X_train, X_test, y_train, y_test = train_test_split(
...     X, y, test_size=0.10, random_state=42)

In [46]:
X_train

array([[-0.94342994, -0.66463548, -0.50720899, ..., -0.56888035,
        -0.60082952, -1.15580092],
       [-1.39196674,  1.01084396, -1.12886611, ..., -0.56888035,
        -1.07165669, -0.6760137 ],
       [ 1.46354573, -1.06821812,  0.84703461, ...,  1.79810856,
         1.21459815,  0.33375545],
       ...,
       [-1.23111216, -1.27612433,  1.0304384 , ..., -0.31952843,
         1.0110702 ,  0.08281839],
       [-1.28198071, -0.51787816, -0.07484787, ..., -0.56888035,
        -0.12999559, -0.39370155],
       [-0.77673235, -0.77470347,  1.50116283, ...,  2.61671317,
         1.82477375,  1.38695693]])

In [47]:
from sklearn.linear_model import LinearRegression

In [48]:
model = LinearRegression().fit(X_train, y_train)

In [49]:
model.intercept_

-7.676151381197371e-17

In [50]:
model.coef_

array([-1.39310836e-16,  2.22044605e-16, -6.34369061e-03, -1.32364285e-02,
       -5.84336655e-04, -5.65919271e-03, -6.47387001e-03,  5.54722344e-03,
        1.00000000e+00, -3.50540621e-03, -1.60246554e-02,  1.97177639e-05,
       -5.43198888e-16, -2.12794147e-02,  7.34867368e-16, -1.91612235e-14,
       -1.42364510e-03,  2.12596970e-02, -2.54544163e-03, -3.00103896e-02,
       -4.71202453e-03, -3.17454396e-16, -1.23427441e-03,  1.23834264e-02,
        5.32333641e-02])

In [51]:
model.score(X_train, y_train)

1.0

In [52]:
model.score(X_test, y_test)

1.0

In [53]:
model.predict(X_test)

array([-7.04288730e-01,  4.54655754e-01, -1.20634128e+00,  4.97606573e-01,
        3.65507606e-01, -3.43508024e+00, -3.07930075e-02,  4.99772769e-01,
        1.63746973e-01,  4.35887625e-01,  4.07736360e-01,  3.18586356e-01,
       -1.45411919e+00,  3.09201363e-01,  5.00854938e-01,  2.57589473e-01,
        4.82808875e-01, -4.66747313e-02, -4.16221306e-02,  4.21811992e-01,
        2.57589473e-01,  4.71079491e-01,  2.24743855e-01, -1.08903815e+00,
        4.95442233e-01, -1.49725006e+00,  4.82808875e-01,  3.09923428e-01,
       -1.72716196e+00,  2.76357602e-01,  4.89847619e-01,  4.54837662e-01,
        4.64400851e-01,  1.02750090e-01,  4.99230756e-01, -4.97430448e-02,
        5.04825369e-01,  4.79200405e-01,  3.79583238e-01,  4.96910494e-01,
        4.78116379e-01,  4.84613110e-01, -1.09366409e-03, -9.86053901e-03,
       -9.43180468e-02,  4.82808875e-01, -1.92436753e-02,  2.60115773e-01,
       -2.82001191e-01,  4.71079491e-01,  3.84275735e-01,  1.23378640e-02,
       -4.83759968e-01, -

In [54]:
y_test

array([-7.04288730e-01,  4.54655754e-01, -1.20634128e+00,  4.97606573e-01,
        3.65507606e-01, -3.43508024e+00, -3.07930075e-02,  4.99772769e-01,
        1.63746973e-01,  4.35887625e-01,  4.07736360e-01,  3.18586356e-01,
       -1.45411919e+00,  3.09201363e-01,  5.00854938e-01,  2.57589473e-01,
        4.82808875e-01, -4.66747313e-02, -4.16221306e-02,  4.21811992e-01,
        2.57589473e-01,  4.71079491e-01,  2.24743855e-01, -1.08903815e+00,
        4.95442233e-01, -1.49725006e+00,  4.82808875e-01,  3.09923428e-01,
       -1.72716196e+00,  2.76357602e-01,  4.89847619e-01,  4.54837662e-01,
        4.64400851e-01,  1.02750090e-01,  4.99230756e-01, -4.97430448e-02,
        5.04825369e-01,  4.79200405e-01,  3.79583238e-01,  4.96910494e-01,
        4.78116379e-01,  4.84613110e-01, -1.09366409e-03, -9.86053901e-03,
       -9.43180468e-02,  4.82808875e-01, -1.92436753e-02,  2.60115773e-01,
       -2.82001191e-01,  4.71079491e-01,  3.84275735e-01,  1.23378640e-02,
       -4.83759968e-01, -

In [55]:
model

LinearRegression()

In [56]:
>>> from sklearn.neural_network import MLPClassifier
>>> from sklearn.datasets import make_classification
>>> from sklearn.model_selection import train_test_split

In [57]:
X_train, X_test, y_train, y_test = train_test_split(
...     X, y, test_size=0.10, random_state=42)

In [58]:
y_train

array([-0.34660654,  0.45465575, -3.8902134 , ...,  0.28538063,
       -0.54475685,  0.45934825])

In [59]:
df3 = df_z_scaled

In [60]:
y = df3["pricePer100"].copy().to_numpy()
X=df3.copy().drop(columns=['pricePer100']).to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42)


In [61]:
from sklearn.neural_network import MLPRegressor   
model = MLPRegressor(solver='lbfgs',alpha=0.001,hidden_layer_sizes=(10,10), max_iter=1000000)

model.fit(X_train,y_train)

y_pred = model.predict(X_test)

In [62]:
model.score(X_test, y_test)

0.9999949939357471

In [63]:
model.score(X_train, y_train)

0.9999965302825528

In [64]:
X.shape

(3052, 25)

In [65]:
y.shape

(3052,)

In [66]:
y

array([-0.61333449, -1.94769072, -4.42980221, ...,  0.50266103,
        0.46873324,  0.33735448])

In [67]:
X_future = numpy.array([[-1.03783051, -1.03783051,  1.38859309, -1.03783051, -1.03783051,
         0.83765871],
       [ 0.70953718,  0.70953718,  1.10523178,  0.70953718,  0.70953718,
         1.31815933]])

NameError: name 'numpy' is not defined

In [None]:
y_future = numpy.array([-0.86290779,  0.02113125])

In [None]:
model.predict(X_future)

In [None]:
X_train[0].size