In [1]:
import cornac
import pandas as pd
import tqdm
import sys
import time
import datetime
import os
from cornac.data import Reader
from cornac.eval_methods import BaseMethod
from cornac.utils import cache
from cornac.models import MF, NMF, BPR,SVD,MostPop,SKMeans
from cornac.metrics import MAE, RMSE, Precision, Recall, NDCG, AUC, MAP

FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.


In [8]:
def code_data_city(data_city):
    data_city=data_city[["user_name","rate","unique_rest","date"]].reset_index(drop=True)
    place_code=data_city.unique_rest.unique()
    place_code=pd.DataFrame({
        "unique_rest":place_code,
        "place_code":[i for i in range(len(place_code))]   
    })
    user_code=data_city.user_name.unique()
    user_code=pd.DataFrame({
        "user_name":user_code,
        "user_code":[i for i in range(len(user_code))]   
    })
    data_city=data_city.merge(place_code, on="unique_rest")
    data_city=data_city.merge(user_code, on="user_name")
    return data_city
def train_test(data,period):
    #во время
    if (period=="intime"):
        test_set=data[data["date"]>=1584653263]#20 марта
        test_set=test_set[test_set["date"]<=1596230863]#1 августа
        train_set=data[data["date"]<1584653263]#20 марта
        train_set=train_set.reset_index(drop=True)
        test_set=test_set[["place_code","rate","date","user_code"]]
        test_set=test_set[test_set.user_code.isin(train_set.user_code.unique())].reset_index(drop=True)
        train_set=train_set[["place_code","rate","date","user_code"]]
    #обычное время послекорона
    if(period=="after"):
        test_set=data[data["date"]>1596230863]#1 августа
        test_set=test_set[test_set["date"]<=1601501263]#1 октября
        train_set=data[data["date"]<=1596230863]#1 августа
        train_set=train_set.reset_index(drop=True)
        test_set=test_set[["place_code","rate","date","user_code"]]
        test_set=test_set[test_set.user_code.isin(train_set.user_code.unique())].reset_index(drop=True)
        train_set=train_set[["place_code","rate","date","user_code"]]
    #обычное время до корона
    if(period=="before"):
        test_set=data[data["date"]>=1577827663]#1 января
        test_set=test_set[test_set["date"]<=1584653263]#20 марта
        train_set=data[data["date"]<1577827663]#1 января
        train_set=train_set.reset_index(drop=True)
        users=test_set.groupby(['user_code']).count()
        users=users[users.place_code>1].reset_index()
        test_set=test_set[test_set.user_code.isin(users.user_code)]
        test_set=test_set[["place_code","rate","date","user_code"]]
        test_set=test_set[test_set.user_code.isin(train_set.user_code.unique())].reset_index(drop=True)
        train_set=train_set[["place_code","rate","date","user_code"]] 
    return train_set,test_set
def create_set(t_set):
    items=list(t_set.place_code)
    users=list(t_set.user_code)  
    ratings=list(t_set.rate)
    t_list=[(users[i],items[i],ratings[i]) for i in range(len(t_set))]
    return t_list
def create_csv_file_with_metric(city,time_str):
    files=os.listdir()
    for name in files:
        if ("CornacExp" in name):
            with open(name) as f:
                f = f.readlines()
            os.remove(name)
    new_lines=[]
    metric_names=f[3].replace("\n","").split("|")[1:]
    for line in f[5:]:
        new_lines.append(line.replace("\n","").split("|")[:])
    df=pd.DataFrame()
    df["alorithm"]=[new_lines[j][0] for j in range(len(new_lines))]
    for i in range(1,len(metric_names)+0):
        df[metric_names[i-1]]=[float(new_lines[j][i]) for j in range(len(new_lines))]
    df.to_csv("cornac_"+city+"_"+time_str+".csv")

In [9]:
def models_initialization():
    gmf = cornac.models.GMF(num_factors=8,num_epochs=10,learner="adam",batch_size=256,lr=0.001,num_neg=50,seed=123)
    mlp = cornac.models.MLP(layers=[64, 32, 16, 8],act_fn="tanh",learner="adam",num_epochs=10,batch_size=256,lr=0.001,
        num_neg=50,seed=123)
    neumf1 = cornac.models.NeuMF(num_factors=8,layers=[64, 32, 16, 8],act_fn="tanh",learner="adam",num_epochs=10,
        batch_size=256,lr=0.001,num_neg=50,seed=123)
    neumf2 = cornac.models.NeuMF(name="NeuMF_pretrained",learner="adam",num_epochs=10,batch_size=256,lr=0.001,
        num_neg=50,seed=123,num_factors=gmf.num_factors,layers=mlp.layers,act_fn=mlp.act_fn,).pretrain(gmf, mlp)
    vaecf = cornac.models.VAECF(k=10,autoencoder_structure=[20],act_fn="tanh",likelihood="mult",n_epochs=100,
        batch_size=100,learning_rate=0.001,beta=1.0,seed=123,use_gpu=True,verbose=True)
    return vaecf, gmf, mlp, neumf1,neumf2

In [10]:
def create_data_for_city(city,time_str):
    data_city=data[data.city==city]
    data_city=code_data_city(data_city)
    train_set,test_set=train_test(data_city,time_str)
    train_data=create_set(train_set)
    test_data=create_set(test_set)
    return train_data,test_data

In [35]:
city="saint"
data_city=data[data.city==city]
data_city=code_data_city(data_city)
train_set,test_set=train_test(data_city,"before")
train_set=train_set[["place_code","rate","user_code"]]
data_city=data_city[["unique_rest","place_code"]].drop_duplicates().reset_index(drop=True)
data_city=data_city.merge(data).drop_duplicates("place_code")[["place_code","rest_name","Завтрак","Бранч"]]

In [36]:
restaurants=pd.read_csv("restaurants.csv")

In [37]:
restaurants=restaurants.rename(columns={"name":"rest_name"})
data_city=data_city.merge(restaurants)[["place_code","rest_name","Завтрак","Бранч","price_icon","cuisine"]]

In [295]:
data_city[data_city.place_code==1135].drop_duplicates("rest_name")

Unnamed: 0,place_code,rest_name,Завтрак,Бранч,price_icon,cuisine
1994,1135,Sushi Booffet,0,0,$,"Японская, Азиатская, Гавайская"


In [38]:
data_city[data_city.place_code.isin([816,758,760,504,974,5])].drop_duplicates("rest_name")

Unnamed: 0,place_code,rest_name,Завтрак,Бранч,price_icon,cuisine
7,5,Кафе Италия,1,1,$$ - $$$,"Итальянская, Пицца, Кафе, Средиземноморская, Е..."
979,760,Бабагануш,1,0,$$ - $$$,"Израильская, Ближневосточная, Фастфуд"
1060,816,Rockets & Bishops,0,0,$$ - $$$,"Бар, Фастфуд, Гастропаб, Американская, Паб, За..."
1864,758,HopHead Tap Room,0,0,$$ - $$$,"Паб с пивоварней, Бар, Пицца, Паб, Европейская"
2000,504,Redrum,0,0,$$ - $$$,"Итальянская, Стейк-хаус, Паб с пивоварней, Фас..."
2070,974,Компания Family,1,0,$$ - $$$,"Итальянская, Азиатская, Японская, Суши"


In [15]:
city="saint"
data_city=data[data.city==city]
data_city=code_data_city(data_city)
train_set,test_set=train_test(data_city,"before")
train_set=train_set[["place_code","rate","user_code"]]
new=0

In [16]:
train_set.loc[len(train_set)]=[2,4,new]
train_set.loc[len(train_set)]=[1052,5,new]
train_set.loc[len(train_set)]=[747,5,new]
train_set.loc[len(train_set)]=[1069,4,new]
train_set.loc[len(train_set)]=[1057,5,new]
train_set.loc[len(train_set)]=[1135,4,new]
train_set.loc[len(train_set)]=[607,3,new]
train_data=create_set(train_set)

In [17]:
user_dict=dict(zip(train_set.user_code.unique(),list(range(0,len(train_set.user_code.unique())))))
item_dict=dict(zip(train_set.place_code.unique(),list(range(0,len(train_set.place_code.unique())))))
train_data=cornac.data.Dataset.build(train_data,global_uid_map=user_dict,global_iid_map=item_dict)

In [33]:
search_age = 644
for name, age in item_dict.items():  # for name, age in dictionary.iteritems():  (for Python 2.x)
    if age == search_age:
        print(name)

974


In [18]:
vaecf = cornac.models.VAECF(k=10,autoencoder_structure=[20],act_fn="tanh",likelihood="mult",n_epochs=100,
        batch_size=100,learning_rate=0.001,beta=1.0,seed=123,use_gpu=True,verbose=True)
vaecf.fit(train_data)

  0%|          | 0/100 [00:00<?, ?it/s]

<cornac.models.vaecf.recom_vaecf.VAECF at 0x9a33395088>

In [21]:
user_dict[0]

21039

In [22]:
ratings=vaecf.score(user_dict[0])
rat_dict=dict(zip(list(range(0,len(ratings))),ratings))

In [23]:
sorted(rat_dict.items(), key=lambda x: x[1], reverse=True)

[(346, 0.009379377),
 (770, 0.009370342),
 (123, 0.008155578),
 (159, 0.007124443),
 (457, 0.00675044),
 (130, 0.006666696),
 (24, 0.006280658),
 (578, 0.0051400037),
 (644, 0.004854817),
 (569, 0.00473655),
 (505, 0.0047208266),
 (46, 0.0045694807),
 (256, 0.004501829),
 (604, 0.0042499104),
 (606, 0.004248815),
 (600, 0.004076876),
 (375, 0.004055103),
 (19, 0.0039938),
 (544, 0.0038821446),
 (62, 0.0038234994),
 (613, 0.0038214184),
 (574, 0.0037004983),
 (707, 0.0036995788),
 (733, 0.003697779),
 (382, 0.0036936908),
 (160, 0.0036450238),
 (137, 0.003596236),
 (947, 0.0035048993),
 (299, 0.003482068),
 (795, 0.003469536),
 (322, 0.0033966103),
 (252, 0.0033244635),
 (586, 0.003263445),
 (816, 0.0030808866),
 (394, 0.0030592484),
 (597, 0.003037041),
 (378, 0.0030349523),
 (355, 0.003022282),
 (247, 0.0030112755),
 (784, 0.002997463),
 (265, 0.0029329439),
 (624, 0.0028639415),
 (149, 0.0028612418),
 (83, 0.0028566306),
 (772, 0.002821568),
 (598, 0.0028093208),
 (450, 0.0028014972)

In [61]:
model=MF(k=10, max_iter=25, learning_rate=0.01, lambda_reg=0.02, use_bias=True, seed=123).fit(train_data)

In [69]:
model.score(1)

array([4.7578993, 4.098787 , 3.21524  , ..., 4.8753915, 3.3676336,
       2.411663 ], dtype=float32)

In [51]:
vaecf = cornac.models.VAECF(k=10,autoencoder_structure=[20],act_fn="tanh",likelihood="mult",n_epochs=100,
        batch_size=100,learning_rate=0.001,beta=1.0,seed=123,use_gpu=True,verbose=True)
models = [MF(k=10, max_iter=25, learning_rate=0.01, lambda_reg=0.02, use_bias=True, seed=123)
        #vaecf,
    ]
model=models[0]
    # define metrics to evaluate the models
metrics = [ Precision(k=10), Recall(k=10), NDCG(k=10), AUC(), MAP()]
    # Instantiate models
#exp=cornac.Experiment(eval_method=eval_method, models=models, metrics=metrics, user_based=True)
#exp.run()
test_result, val_result = eval_method.evaluate(
                model=model,
                metrics=metrics,
                user_based=self.user_based,
                show_validation=self.show_validation,
            )

NameError: name 'self' is not defined

In [33]:
for r in exp.result:
    print(r.model_name)
    user_results = r.metric_user_results # <- this is a dictionary

MF


In [48]:
print(exp.val_result)

None


In [44]:
vaecf.fit(train_data)

AttributeError: 'list' object has no attribute 'reset'

In [6]:
def lerning_and_testing(city,time_str):
    train_data,test_data=create_data_for_city(city,time_str)
    eval_method = BaseMethod.from_splits(
        train_data=train_data, test_data=test_data, exclude_unknowns=False, verbose=False)
    vaecf, gmf, mlp, neumf1,neumf2=models_initialization()
    models = [
        MF(k=10, max_iter=25, learning_rate=0.01, lambda_reg=0.02, use_bias=True, seed=123),
        NMF(k=10, max_iter=100, learning_rate=0.001, lambda_reg=0.001, seed=123),
        BPR(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.01, seed=123),
        SKMeans(k=10, max_iter=25, tol=1e-10,seed=123),
        SVD(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.01, seed=123),
        vaecf,
        gmf,
        mlp,
        neumf1,
        neumf2,
        MostPop(),
    ]
    # define metrics to evaluate the models
    metrics = [ Precision(k=10), Recall(k=10), NDCG(k=10), AUC(), MAP()]
    # Instantiate models
    cornac.Experiment(eval_method=eval_method, models=models, metrics=metrics, user_based=True).run()
    create_csv_file_with_metric(city,time_str)

In [39]:
data=pd.read_csv("FINAL_REVWS_ALL.csv",index_col='Unnamed: 0')
cities=['NY','Rome','Stockholm','Los','saint']
data=data[data.city.isin(cities)].reset_index(drop=True)
data.date=[time.mktime(datetime.datetime.strptime(s, "%Y-%m-%d").timetuple()) for s in data.date]
data=data.rename(columns={"rating":"rate"})

  interactivity=interactivity, compiler=compiler, result=result)


In [7]:
time_str="before"
for city in cities[1:]: 
    print(city)
    lerning_and_testing(city,time_str)

Rome
Iter 1, likelihood: 7202.044910
Iter 2, likelihood: 16433.551557
Iter 3, likelihood: 16492.158416
Iter 4, likelihood: 16502.900064
Iter 5, likelihood: 16509.619952
Iter 6, likelihood: 16515.643428
Iter 7, likelihood: 16519.251127
Iter 8, likelihood: 16520.310934
Iter 9, likelihood: 16526.268393
Iter 10, likelihood: 16527.444290
Iter 11, likelihood: 16527.700361
Iter 12, likelihood: 16527.708020
Iter 13, likelihood: 16527.709971


  0%|          | 0/100 [00:00<?, ?it/s]

  import pandas.util.testing as tm


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]


TEST:
...
                 |    AUC |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
---------------- + ------ + ------ + ------- + ------------ + --------- + --------- + --------
MF               | 0.5191 | 0.0022 |  0.0008 |       0.0003 |    0.0006 |    4.0394 |   8.6962
NMF              | 0.5220 | 0.0025 |  0.0012 |       0.0005 |    0.0017 |    4.3320 |   5.8582
BPR              | 0.7008 | 0.0125 |  0.0129 |       0.0054 |    0.0195 |   11.7314 |   4.8434
Skmeans          | 0.6432 | 0.0070 |  0.0077 |       0.0039 |    0.0126 |   22.8640 |   7.0233
SVD              | 0.5198 | 0.0024 |  0.0010 |       0.0004 |    0.0012 |    1.9301 |   5.7682
VAECF            | 0.7430 | 0.0176 |  0.0209 |       0.0096 |    0.0340 | 4817.5110 |   5.9508
GMF              | 0.6946 | 0.0124 |  0.0127 |       0.0053 |    0.0191 | 2145.1966 |   5.4030
MLP              | 0.6359 | 0.0087 |  0.0094 |       0.0041 |    0.0142 | 2506.8596 |   9.1577
NeuMF            | 0.5688 | 0.0071 |  0

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]


TEST:
...
                 |    AUC |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
---------------- + ------ + ------ + ------- + ------------ + --------- + --------- + --------
MF               | 0.5102 | 0.0102 |  0.0045 |       0.0028 |    0.0092 |    0.0781 |   0.2031
NMF              | 0.5153 | 0.0115 |  0.0078 |       0.0033 |    0.0138 |    0.4219 |   0.2344
BPR              | 0.6127 | 0.0259 |  0.0228 |       0.0077 |    0.0277 |    0.7914 |   0.1870
Skmeans          | 0.5520 | 0.0175 |  0.0188 |       0.0077 |    0.0330 |    1.4689 |   0.3356
SVD              | 0.5108 | 0.0097 |  0.0034 |       0.0022 |    0.0064 |    0.1961 |   0.2031
VAECF            | 0.6132 | 0.0279 |  0.0264 |       0.0088 |    0.0309 |   99.3569 |   0.3126
GMF              | 0.6048 | 0.0267 |  0.0274 |       0.0110 |    0.0391 |  213.9186 |   0.2656
MLP              | 0.6027 | 0.0268 |  0.0266 |       0.0094 |    0.0350 |  225.4524 |   0.3281
NeuMF            | 0.5769 | 0.0227 |  0

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]


TEST:
...
                 |    AUC |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
---------------- + ------ + ------ + ------- + ------------ + --------- + --------- + --------
MF               | 0.5051 | 0.0204 |  0.0177 |       0.0111 |    0.0370 |    0.0156 |   0.0312
NMF              | 0.5145 | 0.0174 |  0.0018 |       0.0028 |    0.0023 |    0.1875 |   0.0312
BPR              | 0.5706 | 0.0353 |  0.0357 |       0.0167 |    0.0486 |    0.3125 |   0.0312
Skmeans          | 0.4887 | 0.0309 |  0.0239 |       0.0083 |    0.0301 |    0.6893 |   0.0469
SVD              | 0.5068 | 0.0213 |  0.0234 |       0.0139 |    0.0509 |    0.0920 |   0.0470
VAECF            | 0.5667 | 0.0306 |  0.0293 |       0.0167 |    0.0417 |   31.4548 |   0.0469
GMF              | 0.5602 | 0.0313 |  0.0305 |       0.0139 |    0.0394 |   81.2346 |   0.0469
MLP              | 0.5509 | 0.0460 |  0.0473 |       0.0167 |    0.0417 |   85.2960 |   0.0781
NeuMF            | 0.5849 | 0.0373 |  0

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]


TEST:
...
                 |    AUC |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
---------------- + ------ + ------ + ------- + ------------ + --------- + --------- + --------
MF               | 0.5445 | 0.0096 |  0.0055 |       0.0018 |    0.0091 |    0.0625 |   0.2187
NMF              | 0.5292 | 0.0069 |  0.0010 |       0.0006 |    0.0020 |    0.5938 |   0.2188
BPR              | 0.6380 | 0.0231 |  0.0247 |       0.0097 |    0.0349 |    1.0469 |   0.1875
Skmeans          | 0.5849 | 0.0109 |  0.0084 |       0.0036 |    0.0126 |    2.3944 |   0.3281
SVD              | 0.5442 | 0.0095 |  0.0051 |       0.0018 |    0.0091 |    0.2813 |   0.1875
VAECF            | 0.6326 | 0.0211 |  0.0227 |       0.0103 |    0.0400 |  185.2531 |   0.2970
GMF              | 0.6527 | 0.0259 |  0.0279 |       0.0103 |    0.0410 |  294.0087 |   0.2502
MLP              | 0.6348 | 0.0258 |  0.0268 |       0.0091 |    0.0329 |  318.0753 |   0.3438
NeuMF            | 0.6311 | 0.0167 |  0

In [50]:
time_str="before"
df_one=pd.DataFrame({"city":[],"time":[],"var":[]})
for time_str in ["before","intime","after"]:
    for city in cities: 
        data_city=data[data.city==city]
        data_city=code_data_city(data_city)
        train_set,test_set=train_test(data_city,time_str)
        df_one.loc[len(df_one)]=[city,time_str,test_set.rate.var()]

In [52]:
df_one.to_csv("var.csv",sep=";")