From 841d015a5f4db61dd6e9c77e8402933e5f58fc6b Mon Sep 17 00:00:00 2001 From: "Luam C. Totti" Date: Fri, 20 Jun 2014 16:01:14 +0300 Subject: [PATCH] Changing type to float32, adding some missing methods for analisys and cleaning some queries. --- dbmanager.py | 48 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/dbmanager.py b/dbmanager.py index 781e5df..c15df1b 100644 --- a/dbmanager.py +++ b/dbmanager.py @@ -97,12 +97,11 @@ def save_features(self, table, image_id, features) : self.db.commit() cursor.close() - - def get_repins(self, small=False, min_repins=0, sample=None) : + def get_repins(self, sample=None) : c = self.db.cursor() - query = "SELECT id, nRepins FROM pins WHERE nRepins>=%d AND useit=1" % (min_repins) + query = "SELECT id, nRepins FROM pins WHERE useit=1" c.execute(query) rows = c.fetchall() c.close() @@ -139,7 +138,7 @@ def get_features(self, table, columns, ids) : if not rows: return [], [], {} - data = np.empty((len(ids), len(columns)), dtype=np.float64) + data = np.empty((len(ids), len(columns)), dtype=np.float32) for i, pin_id in enumerate(ids): data[i,:] = rows_map[pin_id] @@ -218,7 +217,6 @@ def get_data_aesthetics(self, aes_filter, ids): ''' Read the aesthetic features from the database. ''' - # if (not cache_file or not os.path.exists(cache_file)) : data = [] for table, columns in aes_filter.items() : @@ -231,8 +229,8 @@ def get_data_aesthetics(self, aes_filter, ids): data = np.hstack(data) return features, data - - + + def get_data_semantics(self, concepts, ids): ''' Read the semantic concepts from the files. @@ -320,6 +318,42 @@ def get_data_social(self, ids) : data = vec.fit_transform(data).toarray() return vec.get_feature_names(), data + + def get_pins_info(self, ids=None): + c = self.db.cursor() + + c.execute("""SELECT p.id, u.id, u.nFollowers, b.nFollowers + FROM pins p JOIN users u ON p.user_id = u.id + JOIN boards b ON p.board_id = b.id + WHERE p.useit=1 AND u.nFollowers>0""") + rows = c.fetchall() + c.close() + + pins = {} + for pid, uid, ufollowers, bfollowers in rows: + if ids==None or (pid in ids) : + pins[pid] = (uid, ufollowers, bfollowers) + + return pins + + + def get_repins_mean_and_std(self): + c = self.db.cursor() + + c.execute("""select u.nFollowers, COUNT(1), AVG(p.nRepins), STD(p.nRepins) + from pins p join users u on p.user_id = u.id + where u.nFollowers > 0 + group by user_id""") + + rows = c.fetchall() + c.close() + followers, npins, mean_repins, std_repins = zip(*rows) + return np.asarray(followers, int), \ + np.asarray(npins, int), \ + np.asarray(mean_repins, float), \ + np.asarray(std_repins, float) + + def close(self) : self.db.close()