Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Code cleaning.

  • Loading branch information...
commit 3daa69419af972f4ab21b1a838b5d80daa393463 1 parent 0ae65ff
@julosaure authored
Showing with 18 additions and 14 deletions.
  1. +2 −2 lcFormat.py
  2. +16 −12 preprocessData.py
View
4 lcFormat.py
@@ -4,7 +4,7 @@
# nb of columns/features per record
NB_COL=42
-# "employment length","code","initial listing status"] "fico range" "loan id" "status"
+# "employment length","code","initial listing status"] "fico range" "loan id"
fields = ["loan id","amount requested","amount funded by investors","interest rate","loan length","application date","application expiration date","issued date","credit grade","loan title","loan purpose","loan description","monthly payment","status","total amount funded","debt-to-income ratio","remaining principal funded by investors","payments to date (funded by investors)","remaining principal","payments to date","screen name","city","state","home ownership","monthly income","fico range","earliest credit line","open credit lines","total credit lines","revolving credit balance","revolving line utilization","inquiries in the last 6 months","accounts now delinquent","delinquent amount","delinquencies (last 2 yrs)","months since last delinquency","public records on file","months since last record","education","employment length","code","initial listing status"]
f2id = dict(zip(fields, xrange(NB_COL)))
@@ -22,7 +22,7 @@
fields_categorial = ["loan length","credit grade","loan purpose","state","home ownership"]
-targets = ["fully paid", "charged off", 'default', 'issued', 'current', 'performing payment plan', 'late (16-30 days)', 'late (31-120 days)', 'in review', 'in grace period']
+targets = ["fully paid", "charged off", 'default', 'issued', 'current', 'performing payment plan', 'late (16-30 days)', 'late (31-120 days)', 'in review', 'in grace period', 'loan is being issued']
class lcRecord(dict):
def __str__(self):
View
28 preprocessData.py
@@ -1,5 +1,5 @@
-import sys, argparse, csv
+import sys, argparse, csv, pprint
import numpy as np, sklearn as skl, sklearn.feature_extraction as skl_fe
import lcFormat
@@ -55,20 +55,24 @@ def normalize():
pass
def records2Arrays(records):
- arrays = {}
- arrays["target_names"] = np.array(lcFormat.targets)
- arrays["target"] = np.array([lcFormat.targets.index(rec.target) for rec in records])
+ sk_records = {}
+ sk_records["target_names"] = np.array(lcFormat.targets)
+ sk_records["target"] = np.array([lcFormat.targets.index(rec.target) for rec in records])
dv = skl_fe.DictVectorizer()
- arrays["data"] = dv.fit_transform(records)
- arrays["feature_names"] = dv.get_feature_names()
- print arrays
- return arrays
+ sk_records["data"] = dv.fit_transform(records)
+ sk_records["feature_names"] = dv.get_feature_names()
+
+ #pp = pprint.PrettyPrinter()
+ #pp.pprint(sk_records)
+ print sk_records
+ return sk_records
-def main():
- rows = readCsv(trainData)
+def preprocessData(data):
+ rows = readCsv(data)
records = csv2Records(rows)
records = removeIllFormed(records)
- arrays = records2Arrays(records)
+ sk_records = records2Arrays(records)
+ return sk_records
if __name__ == "__main__":
- main()
+ preprocessData(trainData)
Please sign in to comment.
Something went wrong with that request. Please try again.