/
model0.py
53 lines (37 loc) · 1.51 KB
/
model0.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import pandas
from sklearn.linear_model import LogisticRegression
def train():
# read datasets
data = pandas.read_csv("MasterKickstarter.csv")
data = data[
((data.status == "successful") | (data.status == "failed")) & (data.goal > 0)
]
features = pandas.DataFrame()
features["goal"] = data["goal"]
results = data["status"].transform(lambda x: 1 if x == "successful" else 0)
features["name_length"] = data["name"].transform(lambda x: len(x))
features["description_length"] = data["blurb"].transform(lambda x: len(x))
features["days"] = data["Length_of_kick"]
# for val in data["Categories"].unique():
# features[val.replace("%20&%20", "&")] = data["Categories"].transform(lambda x: 1 if x == val else 0)
transform = {}
for feature in features.columns:
val = features[feature].max()
transform[feature] = val
features[feature] = features[feature].transform(
lambda x: x / val
)
c = LogisticRegression().fit(features, results)
print(f"Score is {c.score(features, results)}")
return c
# print("var transform =", transform, ";")
# print("var coefficients =", {a: b for a, b in zip(features.columns, c.coef_[0])}, ";")
# print("var intercept =", c.intercept_[0], ";")
def preProcessing(input):
features = [[int(input.get("goal")) , int(input.get("name")) , int(input.get("blurb")) , int(input.get("Length_of_kick"))]]
return features
def predict(input):
preparedInput = preProcessing(input)
model = train()
prediction = model.predict_proba(preparedInput)
return prediction