In [1]:
from sklearn.pipeline import Pipeline
from steps.feature import ImpactClassifier, ImpactWeigher
from steps.filter import ChampionReleaseFilter
from steps.reader import ReleaseReader, PlayHistoryReader, PatchHistoryReader
from steps.transform import HistoryCombiner, ImpactAggregator, ImpactDiscretizer
import joblib

cr_dict = ReleaseReader.from_csv('data/champions.csv', as_dict=True)
skins_df = ReleaseReader.from_csv('data/skins.csv')
play_df = PlayHistoryReader.from_jsonl('data/playhistory.jsonl')
impact = joblib.load('model/impact_classifier.pickle')

transf = Pipeline([
    ('filter', ChampionReleaseFilter(cr_dict)),
    ('impact_clf', ImpactClassifier(impact)),
    ('impact_w', ImpactWeigher()),
    ('impact_agg', ImpactAggregator()),
    # ('combiner', HistoryCombiner(play_df, skins_df)),
    # ('impact_dsc', ImpactDiscretizer())
])

raw_df = PatchHistoryReader.from_jsonl('data/patches.jsonl')
df = transf.transform(raw_df)
df




Unnamed: 0,patch,date,champion,diff,buff,nerf
0,"April 11, 2009 Patch",2009-04-11,Fiddlesticks,4.500000,4.500000,0.000000
1,"April 11, 2009 Patch",2009-04-11,Jax,-16.000000,0.000000,16.000000
2,"April 11, 2009 Patch",2009-04-11,Master Yi,2.000000,3.000000,1.000000
3,"April 11, 2009 Patch",2009-04-11,Morgana,22.833333,27.500000,4.666667
4,"April 11, 2009 Patch",2009-04-11,Nunu & Willump,3.000000,3.000000,0.000000
...,...,...,...,...,...,...
7336,V9.9,2019-05-01,Sylas,2.000000,2.000000,0.000000
7337,V9.9,2019-05-01,Tahm Kench,16.166667,29.166667,13.000000
7338,V9.9,2019-05-01,Taliyah,0.000000,0.000000,0.000000
7339,V9.9,2019-05-01,Volibear,6.500000,6.500000,0.000000


In [8]:
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from steps.classifier import PatchClassifier

df_ = df
X, y = df_[['popularity', 'winrate', 'banrate', 'total_skins', 'diff']], df_['diff']
a_model = RandomForestClassifier()
c_model = RandomForestClassifier()
model = PatchClassifier(a_model, c_model)

# score = cross_val_score(model, X, y, cv=10, scoring='recall')
# print(f"Cross val score: {score.mean():.2f} (+/- {score.std() * 2:.2f})")
train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=0)
model.fit(train_x, train_y)
predicted, y_use = model.predict(test_x)
print(classification_report(test_y[y_use], predicted))

              precision    recall  f1-score   support

      adjust       0.27      0.25      0.26       131
        buff       0.30      0.62      0.41       228
        nerf       0.20      0.22      0.21       152
        none       0.00      0.00      0.00       247

    accuracy                           0.27       758
   macro avg       0.19      0.27      0.22       758
weighted avg       0.18      0.27      0.21       758



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
