-
Notifications
You must be signed in to change notification settings - Fork 4
/
test1.py
62 lines (42 loc) · 1.57 KB
/
test1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
__author__ = 'aravindan'
import json
import gc
import csv
from scipy import linalg as LAS
import numpy as np
from numpy import linalg as LA
def unique_list(seq):
seen = set()
seen_add = seen.add
return [ x for x in seq if x not in seen and not seen_add(x)]
gc.enable()
'''
jsonarr_users = open("Dataset/yelp_academic_dataset_user.json").readlines()
jsonarr_reviews = open("Dataset/yelp_academic_dataset_review.json").readlines()
jsonarr_business = open("Dataset/yelp_academic_dataset_business.json").readlines()
'''
'''
jsonarr_users = open("Dataset/user_temp.json").readlines()
jsonarr_reviews = open("Dataset/review_temp.json").readlines()
jsonarr_business = open("Dataset/business_temp.json").readlines()
'''
users = []
businesses = []
for jsonstr in open("Dataset/yelp_academic_dataset_user.json").readlines():
if(jsonstr != ""):
jsonobj = json.loads(jsonstr)
users.append(jsonobj["user_id"])
users = unique_list(users)
for jsonstr in open("Dataset/yelp_academic_dataset_business.json").readlines():
if(jsonstr != ""):
jsonobj = json.loads(jsonstr)
businesses.append(jsonobj["business_id"])
businesses = unique_list(businesses)
reviews = []
rating_matrix = np.zeros((len(users),len(businesses)), dtype=int)
for jsonstr in open("Dataset/yelp_academic_dataset_review.json").readlines():
if(jsonstr != ""):
jsonobj = json.loads(jsonstr)
rating_matrix[users.index(jsonobj["user_id"])][businesses.index(jsonobj["business_id"])] = jsonobj["stars"]
np.savetxt('Data1.csv',rating_matrix, delimiter=",")
gc.collect()