Permalink
Browse files

modified experiments

  • Loading branch information...
1 parent 236dd42 commit 1e1630c49b12f4d2ddf6d439946335eaea636840 @kykamath committed Nov 9, 2012
Showing with 69 additions and 18 deletions.
  1. +47 −0 data_analysis/analysis_nov_12.py
  2. +16 −13 data_analysis/mr_analysis_nov_12.py
  3. +4 −1 data_analysis/settings.py
  4. +2 −4 settings.py
@@ -0,0 +1,47 @@
+'''
+Created on Nov 9, 2012
+
+@author: krishnakamath
+'''
+from datetime import datetime
+from dateutil.relativedelta import relativedelta
+from library.file_io import FileIO
+from library.mrjobwrapper import runMRJob
+from mr_analysis_nov_12 import DataStats
+from mr_analysis_nov_12 import PARAMS_DICT
+from pprint import pprint
+from settings import hdfs_input_folder
+from settings import f_data_stats
+import time
+
+class MRAnalysis():
+ @staticmethod
+ def get_input_files_with_tweets(startTime, endTime, folderType='world'):
+ current=startTime
+ while current<=endTime:
+ input_file = hdfs_input_folder%folderType+'%s_%s'%(current.year, current.month)
+ print input_file
+ yield input_file
+ current+=relativedelta(months=1)
+ @staticmethod
+ def run_job(mr_class, output_file, input_files_start_time, input_files_end_time):
+ PARAMS_DICT['input_files_start_time'] = time.mktime(input_files_start_time.timetuple())
+ PARAMS_DICT['input_files_end_time'] = time.mktime(input_files_end_time.timetuple())
+ print 'Running map reduce with the following params:', pprint(PARAMS_DICT)
+ runMRJob(mr_class,
+ output_file,
+ MRAnalysis.get_input_files_with_tweets(input_files_start_time, input_files_end_time),
+ jobconf={'mapred.reduce.tasks':500})
+ FileIO.writeToFileAsJson(PARAMS_DICT, output_file)
+ @staticmethod
+ def data_stats(input_files_start_time, input_files_end_time):
+ mr_class = DataStats
+ output_file = f_data_stats
+ MRAnalysis.run_job(mr_class, output_file, input_files_start_time, input_files_end_time)
+ @staticmethod
+ def run():
+ input_files_start_time, input_files_end_time = datetime(2011, 2, 1), datetime(2012, 8, 31)
+ MRAnalysis.data_stats(input_files_start_time, input_files_end_time)
+
+if __name__ == '__main__':
+ MRAnalysis.run()
@@ -3,22 +3,25 @@
@author: krishnakamath
'''
-'''
-Created on May 7, 2012
-
-@author: krishnakamath
-'''
+from datetime import datetime
from itertools import chain
-import cjson, time
from library.mrjobwrapper import ModifiedMRJob
from library.twitter import getDateTimeObjectFromTweetTimestamp
-#from library.geo import getLatticeLid, getLocationFromLid, getRadiusOfGyration
-#from library.classes import GeneralMethods
-#from collections import defaultdict
-#from datetime import datetime
-#from library.stats import entropy, focus
-#from operator import itemgetter
-#import numpy as np
+import cjson
+import time
+
+# Start time for data analysis
+START_TIME, END_TIME = datetime(2011, 3, 1), datetime(2012, 7, 31)
+
+# Parameters for the MR Job that will be logged.
+HASHTAG_STARTING_WINDOW = time.mktime(START_TIME.timetuple())
+HASHTAG_ENDING_WINDOW = time.mktime(END_TIME.timetuple())
+
+PARAMS_DICT = dict(
+ PARAMS_DICT = True,
+ HASHTAG_STARTING_WINDOW = HASHTAG_STARTING_WINDOW,
+ HASHTAG_ENDING_WINDOW = HASHTAG_ENDING_WINDOW,
+ )
def iterateHashtagObjectInstances(line):
@@ -33,4 +33,7 @@
f_tuo_valid_focus_lid_pair_and_common_hashtag_affinity_score = fld_data_analysis + '/tuo_valid_focus_lid_pair_and_common_hashtag_affinity_score'
f_tuo_valid_focus_lid_pair_and_temporal_affinity_score = fld_data_analysis + '/tuo_valid_focus_lid_pair_and_temporal_affinity_score'
#f_tuo_lid_and_ltuo_other_lid_and_no_o = fld_data_analysis+'/tuo_iid_and_perct_change_of_occurrences'
-
+
+# Nov 12 analysis
+fld_data_analysis = '/mnt/chevron/kykamath/data/geo/hashtags/data_analysis/'
+f_data_stats = fld_data_analysis+'/data_stats'
View
@@ -49,10 +49,8 @@
hashtagsImagesHastagEvolutionFolder = hashtagsImagesFolder + 'hashtag_evolution/'
hashtagsImagesHastagSharingVsTransmittingProbabilityFolder = hashtagsImagesFolder + 'sharing_vs_transmitting/'
-
-
# Classifiers
hashtagsClassifiersFolder = hashtagsAnalysisFolder+'/classifiers/%s/%s/'
targetSelectionRegressionClassifiersFolder = hashtagsAnalysisFolder+'/ts_classifiers/%s/%s/%s/'
-#Models
-hashtagsModelsFolder = hashtagsAnalysisFolder+'/models/%s/%s/%s/'
+# Models
+hashtagsModelsFolder = hashtagsAnalysisFolder+'/models/%s/%s/%s/'

0 comments on commit 1e1630c

Please sign in to comment.