Permalink
Browse files

modified files

  • Loading branch information...
kykamath committed Nov 16, 2012
1 parent 8ec9503 commit 945d1e708ff4f088cd29243fdc0d653f105af596
Showing with 23 additions and 15 deletions.
  1. +13 −7 data_analysis/mr_analysis_nov_12.py
  2. +10 −8 data_analysis/plots_nov_12.py
@@ -21,6 +21,8 @@
MIN_HASHTAG_OCCURRENCES_PER_LOCATION = 5
+TIME_UNIT_IN_SECONDS = 60*10 # 10 minutes bucket
+
# Start time for data analysis
START_TIME, END_TIME = datetime(2011, 3, 1), datetime(2012, 9, 30)
@@ -34,7 +36,8 @@
MIN_HASHTAG_OCCURRENCES = MIN_HASHTAG_OCCURRENCES,
HASHTAG_STARTING_WINDOW = HASHTAG_STARTING_WINDOW,
HASHTAG_ENDING_WINDOW = HASHTAG_ENDING_WINDOW,
- MIN_HASHTAG_OCCURRENCES_PER_LOCATION = MIN_HASHTAG_OCCURRENCES_PER_LOCATION
+ MIN_HASHTAG_OCCURRENCES_PER_LOCATION = MIN_HASHTAG_OCCURRENCES_PER_LOCATION,
+ TIME_UNIT_IN_SECONDS = TIME_UNIT_IN_SECONDS
)
@@ -222,7 +225,10 @@ def mapper1(self, key, hashtag_object):
ltuo_location_and_occurrence_time =\
[(location, min(items, key=itemgetter(0))[0])for location, items in ltuo_location_and_items]
for location, occurrence_time in ltuo_location_and_occurrence_time:
- self.mf_location_to_ltuo_hashtag_and_min_occ_time[location].append([hashtag, occurrence_time])
+ self.mf_location_to_ltuo_hashtag_and_min_occ_time[location].append([
+ hashtag,
+ GeneralMethods.approximateEpoch(occurrence_time, TIME_UNIT_IN_SECONDS)
+ ])
for neighbor_location, _ in ltuo_location_and_occurrence_time:
if location!=neighbor_location:
self.mf_location_to_neighbor_locations[location].add(neighbor_location)
@@ -272,15 +278,15 @@ def reducer2(self, location, it_tuo_neighbor_location_and_ltuo_hashtag_and_min_o
hashtags = set(zip(*ltuo_hashtag_and_min_occ_time)[0])
neighbor_hashtags = set(zip(*neighbor_ltuo_hashtag_and_min_occ_time)[0])
num_common_hashtags = len(hashtags.intersection(neighbor_hashtags)) + 0.0
- if num_common_hashtags>50:
+ if num_common_hashtags>100:
similarity_and_lag_object = {'location': location, 'neighbor_location': neighbor_location}
- similarity_and_lag_object['haversine_distance'] = self._haversine_distance(location, neighbor_location)
+ similarity_and_lag_object['haversine_distance'] =\
+ self._haversine_distance(location, neighbor_location)
similarity_and_lag_object['similarity'] =\
- self._similarity(ltuo_hashtag_and_min_occ_time, neighbor_ltuo_hashtag_and_min_occ_time)
+ self._similarity(ltuo_hashtag_and_min_occ_time, neighbor_ltuo_hashtag_and_min_occ_time)
similarity_and_lag_object['adoption_lag'] =\
- self._adoption_lag(ltuo_hashtag_and_min_occ_time, neighbor_ltuo_hashtag_and_min_occ_time)
+ self._adoption_lag(ltuo_hashtag_and_min_occ_time, neighbor_ltuo_hashtag_and_min_occ_time)
yield '', similarity_and_lag_object
-
def steps(self):
return self.get_dense_hashtags.get_jobs() +\
[self.mr(mapper=self.mapper1, mapper_final=self.mapper_final1, reducer=self.reducer1)]+\
@@ -117,21 +117,23 @@ def top_k_locations_on_world_map():
savefig(output_file)
@staticmethod
def _plot_affinities(type):
- TIME_UNIT_IN_SECONDS = 60*10
+# TIME_UNIT_IN_SECONDS = 60*10
mf_distance_to_affinity_scores = defaultdict(list)
for similarity_and_lag_object in\
FileIO.iterateJsonFromFile(f_dense_hashtags_similarity_and_lag, remove_params_dict=True):
distance=int(similarity_and_lag_object['haversine_distance']/100)*100+100
mf_distance_to_affinity_scores[distance].append(similarity_and_lag_object[type])
- ltuo_distance_and_num_samples = [(distance, len(affinity_scores)) for distance, affinity_scores in mf_distance_to_affinity_scores.iteritems()]
- for distance, num_samples in ltuo_distance_and_num_samples:
- print distance, num_samples
- exit()
+# ltuo_distance_and_num_samples = [(distance, len(affinity_scores)) for distance, affinity_scores in mf_distance_to_affinity_scores.iteritems()]
+# ltuo_distance_and_num_samples.sort(key=itemgetter(0))
+# for distance, num_samples in ltuo_distance_and_num_samples:
+# print distance, num_samples
+# exit()
ltuo_distance_and_affinity_score = [(distance, np.mean(affinity_scores))
for distance, affinity_scores in mf_distance_to_affinity_scores.iteritems()
if len(affinity_scores)>100]
x_distances, y_affinity_scores = zip(*sorted(ltuo_distance_and_affinity_score, key=itemgetter(0)))
- if type=='adoption_lag': y_affinity_scores = [y*TIME_UNIT_IN_SECONDS/(60.*60.) for y in y_affinity_scores]
+ if type=='adoption_lag':
+ y_affinity_scores = [y/(60.*60.) for y in y_affinity_scores]
plt.figure(num=None, figsize=(6,3))
plt.subplots_adjust(bottom=0.2, top=0.9, wspace=0, hspace=0)
x_distances, y_affinity_scores = splineSmooth(x_distances, y_affinity_scores)
@@ -160,8 +162,8 @@ def run():
# DataAnalysis.hashtag_locations_distribution_loglog()
# DataAnalysis.fraction_of_occurrences_vs_rank_of_location()
# DataAnalysis.top_k_locations_on_world_map()
- DataAnalysis.content_affinity_vs_distance()
-# DataAnalysis.temporal_affinity_vs_distance()
+# DataAnalysis.content_affinity_vs_distance()
+ DataAnalysis.temporal_affinity_vs_distance()
if __name__ == '__main__':
DataAnalysis.run()

0 comments on commit 945d1e7

Please sign in to comment.