The goal here is to look into the people that tweet because of the hashtag. What do they generally look like? Generate descriptive statistics (both network and Twitter based) and contrast them with other user types.

In [1]:
from imports import * 

In [51]:
campaigns = preprocessing.load_campaign()
with open(os.path.join(TWITTER_DATA_DIR, 'exposure_results.pkl'), 'rb') as f:
    exposures = pickle.load(f)

In [52]:
with open(os.path.join(TWITTER_DATA_DIR, 'n_newly_exposed_by_user.pkl'), 'rb') as f:
    new_exposures = pickle.load(f)

new_exposures_df = pd.DataFrame(pd.DataFrame(new_exposures).T.stack())
new_exposures_df.index = new_exposures_df.index.set_names(['hashtag', 'username'])
new_exposures_df.columns = ['newly_exposed_users']
new_exposures_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,newly_exposed_users
hashtag,username,Unnamed: 2_level_1
jharkhandwithmodi,manojgoelbjp,14000.0
jharkhandwithmodi,moonmuks,0.0
jharkhandwithmodi,sanjay_mishra91,1800.0
jharkhandwithmodi,ch_rahul29,930.0
jharkhandwithmodi,shaileshmishra,2000.0


In [53]:
dfs = []
for hashtag, tweets in tqdm(campaigns.items()):
    if hashtag not in preprocessing.bandwagon_hashtags and hashtag not in preprocessing.never_trended:
        _, df = trending.build_df(hashtag, tweets, exposures,
                                  raw_df_too=True, include_missing=False)
        dfs.append(df.merge(new_exposures_df.reset_index(), on=['username', 'hashtag']))
panel_df = pd.concat(dfs)
panel_df.head()

HBox(children=(FloatProgress(value=0.0, max=75.0), HTML(value='')))




Unnamed: 0,username,id,retweet_from,template,retweet_id,text,date,retweets,favorites,adj_date,type,follower_data,template_exposure,normal_exposure,total_exposure,time,trending_start,inferred_trending_start,hashtag,newly_exposed_users
0,bjp4latehar,1172033856607682560,,,,आधारभूत संरचना हो या शिक्षा या फिर स्वच्छता या...,2019-09-12 06:27:01,0,0,2019-09-12 11:57:01,regular,True,151,64,215,2019-09-12 11:57:01,2019-02-17 15:30:00,2019-02-17 14:20:00,jharkhandwithmodi,0
1,bjp4latehar,1172027968400445440,,,,झारखण्ड सरकार किसी भी आपात स्थिति में पीड़ित क...,2019-09-12 06:03:37,0,0,2019-09-12 11:33:37,regular,True,151,64,215,2019-09-12 11:33:37,2019-02-17 15:30:00,2019-02-17 14:20:00,jharkhandwithmodi,0
2,bjp4latehar,1172028765326569472,,,,अब राजपत्रित नौकरियों को छोड़कर सभी सरकारी नौक...,2019-09-12 06:06:47,0,0,2019-09-12 11:36:47,regular,True,151,64,215,2019-09-12 11:36:47,2019-02-17 15:30:00,2019-02-17 14:20:00,jharkhandwithmodi,0
3,bjp4latehar,1172029990159781888,,,,झारखण्ड गठन के बाद पहली बार रघुवर सरकार ने राज...,2019-09-12 06:11:39,0,0,2019-09-12 11:41:39,regular,True,151,64,215,2019-09-12 11:41:39,2019-02-17 15:30:00,2019-02-17 14:20:00,jharkhandwithmodi,0
4,bjp4latehar,1172086387274743809,bjp4jharkhand,,1.17207040415787e+18,rt @bjp4jharkhand: आज का दिन झारखंड के लिए ऐति...,2019-09-12 09:55:45,31,0,2019-09-12 15:25:45,regular_retweet,True,151,64,215,2019-09-12 15:25:45,2019-02-17 15:30:00,2019-02-17 14:20:00,jharkhandwithmodi,0


## Network Level Info

In [54]:
network_user_df = pd.read_pickle(os.path.join(TWITTER_DATA_DIR, 'user_network_stats.pkl'))
network_user_df

Unnamed: 0,pr,cc,outdeg,indeg
0001atulrai,3e-06,1,42,20
0001sudarshan,2.6e-06,0.28,86,48
0006subhash,1.9e-06,0.34,76,6
000fdc0fb7974a7,2e-06,0.55,29,9
000pal,1.9e-06,0.25,1.1e+02,3
...,...,...,...,...
___subodh,2e-06,0.34,4.6e+02,13
____daddysgirl,1.9e-06,0,0,0
____realpatel__,2.2e-06,0.28,2e+02,41
_____anusha,2.6e-06,0.4,1.8e+02,20


In [6]:
# import network_plots, graph_tool.all as gt, gzip
# g = network_plots.load_follower_network()

# pr = gt.pagerank(g).a
# clustering = gt.local_clustering(g).a
# out_degs = g.get_out_degrees(g.get_vertices())
# in_degs = g.get_in_degrees(g.get_vertices())
# usernames = [g.vp.usernames[v] for v in g.vertices()]

# network_user_df = pd.DataFrame([pr,clustering,out_degs,in_degs]).T
# network_user_df.index = usernames
# network_user_df.columns = ['pr', 'cc', 'outdeg', 'indeg']

# network_user_df.to_pickle(os.path.join(TWITTER_DATA_DIR, 'user_network_stats.pkl'))

## Analyzing Users
For each user, how many followers do they have? 

In [57]:
def describe_users(big_df):
    res_dict = {}
    stats = ['# Tweets', '# Followers', '# Campaigns', 'Avg. Retweets', 'Avg. Favorites', 
             'PageRank', 'Clustering Coef.', 'Out-Degree', 'In-Degree', 'Avg. Newly Exposed']
    def get_user_stats(df, mean=False):
        temp = [len(df)]
        username = df.username.iloc[0]
        path = os.path.join(FOLLOWER_DATA_DIR, username + '.gz')
        f = gzip.open(path, 'rb')
        try:
            followers = [x.split('\t')[1] for x in f.read().decode().strip().split('\n')]
            temp.append(len(followers))
        except Exception as e:
            print(e, 'issue with ', username)
            temp.append(-1)
        temp.append(len(df.hashtag.unique()))
        if mean:
            temp.append(df.retweets.mean())
            temp.append(df.favorites.mean())    
        else:
            temp.append(df.retweets.median())
            temp.append(df.favorites.median())    
        temp.append(df.pr.iloc[0])
        temp.append(df.cc.iloc[0])
        temp.append(df.outdeg.iloc[0])
        temp.append(df.indeg.iloc[0])
        temp.append(df.newly_exposed_users.iloc[0])
        res_dict[username] = temp

    big_df.groupby('username').apply(get_user_stats)
    t = pd.DataFrame(res_dict).T
    t.columns = stats
    return t 

In [58]:
big_panel = []
meta_df = []
def helper(df, desc):
    t = describe_users(df)
    t['Group'] = desc 
    big_panel.append(t)
    meta_df.append(
        pd.concat({desc: t.describe()}, names=['User Group']))    

In [59]:
zero_exposure_tweeters = panel_df.query('total_exposure == 0').query('type == "regular"') \
.query('adj_date > inferred_trending_start').join(network_user_df, on='username')

helper(zero_exposure_tweeters, 'Zero-Exposure, Post-Hashtag')

list index out of range issue with  movies_it


In [60]:
zero_exposure_tweeters_pre_trend = panel_df.query('total_exposure == 0').query('type == "regular"') \
    .query('adj_date < inferred_trending_start').join(network_user_df, on='username')
helper(zero_exposure_tweeters_pre_trend,  'Zero-Exposure, Pre-Hashtag')

In [61]:
others = panel_df.query('total_exposure != 0').query('type == "regular"').join(network_user_df, on='username')
helper(others, 'Exposed, Non-Template')

list index out of range issue with  faraktesuresh
list index out of range issue with  kolinilangi
list index out of range issue with  shyamsu08245833


In [62]:
templates = panel_df.query('type == "template"').join(network_user_df, on='username')
helper(templates, 'Template')

list index out of range issue with  faraktesuresh
list index out of range issue with  kajal_barge
list index out of range issue with  kolinilangi
list index out of range issue with  shyamsu08245833


In [64]:
res = pd.concat(meta_df)
res.index = res.index.set_names(['User Type', 'Statistic'])
res

Unnamed: 0_level_0,Unnamed: 1_level_0,# Tweets,# Followers,# Campaigns,Avg. Retweets,Avg. Favorites,PageRank,Clustering Coef.,Out-Degree,In-Degree,Avg. Newly Exposed
User Type,Statistic,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
"Zero-Exposure, Post-Hashtag",count,2523.0,2523.0,2523.0,2514.0,2512.0,2523.0,2523.0,2523.0,2523.0,2523.0
"Zero-Exposure, Post-Hashtag",mean,6.152596,25751.07,1.617519,7.739857,22.564695,1.1e-05,0.348934,90.917162,345.810147,15313.59
"Zero-Exposure, Post-Hashtag",std,25.330099,333373.9,1.507568,82.484324,302.432938,6.4e-05,0.277638,205.873272,1837.86265,256600.8
"Zero-Exposure, Post-Hashtag",min,1.0,-1.0,1.0,0.0,0.0,2e-06,0.0,0.0,0.0,0.0
"Zero-Exposure, Post-Hashtag",25%,1.0,63.0,1.0,0.0,0.0,2e-06,0.182336,3.0,4.0,17.0
"Zero-Exposure, Post-Hashtag",50%,2.0,235.0,1.0,0.113074,1.0,2e-06,0.311688,31.0,16.0,82.0
"Zero-Exposure, Post-Hashtag",75%,4.0,1226.5,2.0,1.285714,3.0,3e-06,0.451156,96.5,85.0,443.5
"Zero-Exposure, Post-Hashtag",max,903.0,13106380.0,17.0,2752.0,12109.0,0.002004,2.0,4746.0,49100.0,11346540.0
"Zero-Exposure, Pre-Hashtag",count,913.0,913.0,913.0,911.0,911.0,913.0,913.0,913.0,913.0,913.0
"Zero-Exposure, Pre-Hashtag",mean,2.909091,55628.63,1.07667,15.633041,40.629873,1.7e-05,0.350844,182.382256,553.751369,46706.56


In [65]:
temp = pd.concat(big_panel)

In [67]:
temp.to_pickle(os.path.join(TWITTER_DATA_DIR, 'all_users_by_group_medians.pkl'))
# temp.to_pickle(os.path.join(TWITTER_DATA_DIR, 'all_users_by_group.pkl'))

In [3]:
temp = pd.read_pickle(os.path.join(TWITTER_DATA_DIR, 'all_users_by_group.pkl'))
temp

Unnamed: 0,# Tweets,# Followers,# Campaigns,Avg. Retweets,Avg. Favorites,PageRank,Clustering Coef.,Out-Degree,In-Degree,Avg. Newly Exposed,Group
007_joshh,2.0,40.0,1.0,0.000000,0.000000,0.000002,0.180662,240.0,3.0,13.0,"Zero-Exposure, Post-Hashtag"
1984_tweeter,1.0,9.0,1.0,0.000000,0.000000,0.000002,1.000000,3.0,0.0,5.0,"Zero-Exposure, Post-Hashtag"
1995subhampaul,1.0,159.0,1.0,0.000000,0.000000,0.000002,0.407557,26.0,13.0,71.0,"Zero-Exposure, Post-Hashtag"
1_ndia,6.0,48.0,2.0,0.166667,0.833333,0.000002,0.400000,37.0,8.0,31.0,"Zero-Exposure, Post-Hashtag"
1stindianews,1.0,113070.0,1.0,7.000000,9.000000,0.000037,0.038084,10.0,2198.0,42120.0,"Zero-Exposure, Post-Hashtag"
...,...,...,...,...,...,...,...,...,...,...,...
yuva4namo1,2.0,1.0,1.0,0.000000,0.500000,0.000002,0.300000,5.0,0.0,0.0,Template
yv4bjp,14.0,2248.0,4.0,0.357143,0.571429,0.000003,0.485781,68.0,218.0,140.0,Template
zahidpatka,16.0,3231.0,5.0,1.250000,1.812500,0.000007,0.173755,0.0,439.0,295.0,Template
zalaji_9999,2.0,51.0,1.0,0.000000,3.500000,0.000002,0.479273,102.0,12.0,8.0,Template


In [4]:
user_df = preprocessing.load_user_data()

In [5]:
from tableone import TableOne

In [15]:
cats = ['# Followers', '# Tweets', 'Avg. Retweets', 'Avg. Favorites',
        'PageRank', 'Clustering Coef.', 'Out-Degree', 'In-Degree', 'Avg. Newly Exposed', 
        '# Campaigns']

In [63]:
temp.loc[:, 'PageRank'] = temp.PageRank.apply(lambda x: x*1_000_000)

In [64]:
r = TableOne(temp.reset_index().drop(columns=['index']), columns=[], categorical=[],
             nonnormal=cats, 
             groupby='Group')

In [65]:
pd.set_option('display.float_format', '{:.2g}'.format)

In [66]:
r.tableone.drop(columns=[('Grouped by Group', 'Missing')])

Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by Group,Grouped by Group,Grouped by Group,Grouped by Group,Grouped by Group
Unnamed: 0_level_1,Unnamed: 1_level_1,Overall,"Exposed, Non-Template",Template,"Zero-Exposure, Post-Hashtag","Zero-Exposure, Pre-Hashtag"
n,,23020,16298,3286,2523,913
"# Tweets, median [Q1,Q3]",,"3.0 [1.0,8.0]","3.0 [1.0,8.0]","4.0 [1.0,13.0]","2.0 [1.0,4.0]","1.0 [1.0,2.0]"
"# Followers, median [Q1,Q3]",,"454.0 [120.0,2012.0]","512.0 [144.0,2159.5]","381.5 [78.0,1745.8]","235.0 [63.0,1226.5]","383.0 [98.0,1883.0]"
"# Campaigns, median [Q1,Q3]",,"1.0 [1.0,2.0]","2.0 [1.0,3.0]","1.0 [1.0,2.0]","1.0 [1.0,2.0]","1.0 [1.0,1.0]"
"Avg. Retweets, median [Q1,Q3]",,"0.5 [0.0,2.0]","0.5 [0.0,2.1]","0.4 [0.0,2.0]","0.1 [0.0,1.3]","0.1 [0.0,2.0]"
"Avg. Favorites, median [Q1,Q3]",,"1.2 [0.2,4.2]","1.5 [0.3,5.0]","1.0 [0.1,3.4]","1.0 [0.0,3.0]","1.0 [0.0,3.6]"
"PageRank, median [Q1,Q3]",,"2382111.5 [2006431.5,4060326.1]","2449981.9 [2030809.6,4252913.6]","2401580.2 [2019210.7,3998883.3]","2098892.0 [1925456.8,3108848.1]","2271029.5 [1961231.8,3674371.0]"
"Clustering Coef., median [Q1,Q3]",,"0.3 [0.2,0.4]","0.3 [0.2,0.4]","0.4 [0.3,0.5]","0.3 [0.2,0.5]","0.3 [0.2,0.4]"
"Out-Degree, median [Q1,Q3]",,"184.5 [64.0,442.0]","230.0 [98.0,517.0]","174.0 [52.0,440.0]","31.0 [3.0,96.5]","70.0 [12.0,204.0]"
"In-Degree, median [Q1,Q3]",,"44.0 [10.0,186.0]","51.0 [13.0,201.0]","46.0 [9.0,200.0]","16.0 [4.0,85.0]","28.0 [6.0,145.0]"


In [35]:
def split_column_name(x):
    if ',' in x:
        tok = x.split(',')
        new_str = '\\\\'.join(tok[:2])
        return '\\parbox{3cm}{' + new_str + '}'
    else:
        return x

def replace_substrings(text, replacements):
    for query, repl in replacements.items():
        new_text = text.replace(query, repl)
    return new_text
    
def reformat_tableone(tex):
    # function that takes a tex string and splits the std. errors or [q1,q3] onto a new line
    new_tex = ''
    new_lines = []
    n_lines = len(tex.split('\n'))
    for i, line in enumerate(tex.split('\n')):
        if i < 6 or n_lines-i <= 3:
            if i is 3:
                temp = [split_column_name(x) for x in line.split('&')]
                new_lines.append('&'.join(temp) + '\\\\')
            else:
                new_lines.append(line)
        else:
            mod_line, extra_line = [], []
            for word in line.split('&'):
                if '[' in word:
                    beg, end = word.index('['), word.index(']')
                    mod_line.append(word[:beg])
                    extra_line.append('('+ word[beg+1:end] + ')')
                else:
                    mod_line.append(word)
                    extra_line.append('{}')
            new_lines.append('&'.join(mod_line) + '\\\\')
            new_lines.append('&'.join(extra_line)+ '\\\\')
            
    return '\n'.join(new_lines)

In [40]:
replacements = {'\\multicolumn{5}{l}{Grouped by Group}' : '\\multicolumn{5}{c}{Grouped by Group} \\cr \\cline{2-7}'}

In [46]:
with open(os.path.join(ASSETS_DIR, 'user_descriptions.tex'), 'w') as f:
    print(reformat_tableone(replace_substrings(
        r.tableone.drop(columns=[('Grouped by Group', 'Missing')]).to_latex(), replacements))
          , file=f)

# Examining The Disjoint

In [15]:
zero_exposure_tweeters.query('outdeg == 0')

Unnamed: 0_level_0,username,id,retweet_from,template,retweet_id,text,date,retweets,favorites,adj_date,...,normal_exposure,total_exposure,time,trending_start,inferred_trending_start,hashtag,pr,cc,outdeg,indeg
adj_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-02-17 14:33:29,bjp4ichagarh,1097058944114089985,,,,झारखंड को प्रधानमंत्री जी की एक और सौगात #jhar...,2019-02-17 09:03:29,1,1,2019-02-17 14:33:29,...,0,0,2019-02-17 14:33:29,2019-02-17 15:30:00,2019-02-17 14:20:00,jharkhandwithmodi,0.000002,0.435574,0.0,85.0
2019-09-12 14:57:24,rajeevsondhibjp,1172079249953443840,,,,"आज आदिवासी बच्चों की, युवाओं की शिक्षा और उनके...",2019-09-12 09:27:24,0,1,2019-09-12 14:57:24,...,0,0,2019-09-12 14:57:24,2019-02-17 15:30:00,2019-02-17 14:20:00,jharkhandwithmodi,0.000008,0.206692,0.0,374.0
2019-02-17 14:21:04,bjp4ichagarh,1097055819063906304,,,,#jharkhand में 2014 तक सिर्फ 16 mw सौर ऊजा का ...,2019-02-17 08:51:04,1,1,2019-02-17 14:21:04,...,0,0,2019-02-17 14:21:04,2019-02-17 15:30:00,2019-02-17 14:20:00,jharkhandwithmodi,0.000002,0.435574,0.0,85.0
2019-09-12 14:54:25,rajeevsondhibjp,1172078502453006343,,,,हमने प्रधानमंत्री आवास योजना के माध्यम से 2 कर...,2019-09-12 09:24:25,0,4,2019-09-12 14:54:25,...,0,0,2019-09-12 14:54:25,2019-02-17 15:30:00,2019-02-17 14:20:00,jharkhandwithmodi,0.000008,0.206692,0.0,374.0
2019-09-12 14:58:39,rajeevsondhibjp,1172079566766006272,,,,ये एकलव्य स्कूल आदिवासी बच्चों की पढ़ाई-लिखाई ...,2019-09-12 09:28:39,0,1,2019-09-12 14:58:39,...,0,0,2019-09-12 14:58:39,2019-02-17 15:30:00,2019-02-17 14:20:00,jharkhandwithmodi,0.000008,0.206692,0.0,374.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-10-12 00:53:50,offersanddeals4,1050467027213676545,,,,💥💥😍skmei analog digital watches @ 91% off star...,2018-10-11 19:23:50,0,0,2018-10-12 00:53:50,...,0,0,2018-10-12 00:53:50,2018-10-11 23:30:00,2018-10-11 23:30:00,gappupappu,0.000002,0.000000,0.0,2.0
2018-10-12 13:16:34,aparichit70,1050653943078875136,,,,accept karo challange #gappupappu https://t.co...,2018-10-12 07:46:34,0,0,2018-10-12 13:16:34,...,0,0,2018-10-12 13:16:34,2018-10-11 23:30:00,2018-10-11 23:30:00,gappupappu,0.000002,0.833333,0.0,4.0
2018-10-27 16:18:53,aparichit70,1056135642038657024,,,,#gappupappu ki team se @rahulgandhi ji https:/...,2018-10-27 10:48:53,0,0,2018-10-27 16:18:53,...,0,0,2018-10-27 16:18:53,2018-10-11 23:30:00,2018-10-11 23:30:00,gappupappu,0.000002,0.833333,0.0,4.0
2018-10-12 13:25:23,aparichit70,1050656160678694913,,,,#gappupappu sudar jao https://t.co/277yypud7q,2018-10-12 07:55:23,0,0,2018-10-12 13:25:23,...,0,0,2018-10-12 13:25:23,2018-10-11 23:30:00,2018-10-11 23:30:00,gappupappu,0.000002,0.833333,0.0,4.0


In [16]:
zero_exposure_tweeters.query('indeg == 0')

Unnamed: 0_level_0,username,id,retweet_from,template,retweet_id,text,date,retweets,favorites,adj_date,...,normal_exposure,total_exposure,time,trending_start,inferred_trending_start,hashtag,pr,cc,outdeg,indeg
adj_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-09-25 22:58:46,trueself1950,1176911432983531520,,,,#bjp #haryanaassemblyelection #haryanaelection...,2019-09-25 17:28:46,1,0,2019-09-25 22:58:46,...,0,0,2019-09-25 22:58:46,2019-02-17 15:30:00,2019-02-17 14:20:00,jharkhandwithmodi,0.000002,0.326476,126.0,0.0
2019-09-19 11:11:05,trueself1950,1174559012139831296,,,,#modimadedisaster #modified100 #varanasi #utta...,2019-09-19 05:41:05,0,1,2019-09-19 11:11:05,...,0,0,2019-09-19 11:11:05,2019-02-17 15:30:00,2019-02-17 14:20:00,jharkhandwithmodi,0.000002,0.326476,126.0,0.0
2019-09-18 19:26:31,trueself1950,1174321304826044416,,,,#save2lakhjobs #syeraatrailer #ecigarettes #ec...,2019-09-18 13:56:31,2,0,2019-09-18 19:26:31,...,0,0,2019-09-18 19:26:31,2019-02-17 15:30:00,2019-02-17 14:20:00,jharkhandwithmodi,0.000002,0.326476,126.0,0.0
2019-09-12 11:59:11,trueself1950,1172034400550150146,,,,#haryana #jharkhand #jharkhandwithmodi #mahara...,2019-09-12 06:29:11,1,0,2019-09-12 11:59:11,...,0,0,2019-09-12 11:59:11,2019-02-17 15:30:00,2019-02-17 14:20:00,jharkhandwithmodi,0.000002,0.326476,126.0,0.0
2019-09-16 12:28:00,trueself1950,1173491207055368194,,,,#haryanaelection #haryana #maharashtra #mahara...,2019-09-16 06:58:00,0,0,2019-09-16 12:28:00,...,0,0,2019-09-16 12:28:00,2019-02-17 15:30:00,2019-02-17 14:20:00,jharkhandwithmodi,0.000002,0.326476,126.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-03-09 20:00:03,gumnamhai,1104388883599499264,,,,जब सैफ़ अली खान का बेटा तैमूर अली खान है तो फ़...,2019-03-09 14:30:03,27,31,2019-03-09 20:00:03,...,0,0,2019-03-09 20:00:03,2019-03-09 15:30:00,2019-03-09 15:30:00,modifiedjobs,0.000002,0.266667,6.0,0.0
2019-01-04 21:14:26,gargmanishindia,1081214780487159808,,,,#rafaledramaflops #gappupappu made a big mocke...,2019-01-04 15:44:26,0,0,2019-01-04 21:14:26,...,0,0,2019-01-04 21:14:26,2018-10-11 23:30:00,2018-10-11 23:30:00,gappupappu,0.000002,0.711111,10.0,0.0
2019-01-02 23:14:20,gargmanishindia,1080520180462223360,,,,dear #rahulgandhi #gappupappu #mahagathbandhan...,2019-01-02 17:44:20,1,0,2019-01-02 23:14:20,...,0,0,2019-01-02 23:14:20,2018-10-11 23:30:00,2018-10-11 23:30:00,gappupappu,0.000002,0.711111,10.0,0.0
2019-01-02 23:41:25,gargmanishindia,1080526994855682048,,,,"dear mam @smitaprakash, why are #secularmedia ...",2019-01-02 18:11:25,0,0,2019-01-02 23:41:25,...,0,0,2019-01-02 23:41:25,2018-10-11 23:30:00,2018-10-11 23:30:00,gappupappu,0.000002,0.711111,10.0,0.0


In [24]:
zero_exposure_tweeters.query('outdeg == 0 and indeg == 0')

username                                                        vishalcincmp
id                                                       1195479544301412352
retweet_from                                                                
template                                                                    
retweet_id                                                                  
text                       😡waiting for my first subscriber😡 -&gt;https:/...
date                                                     2019-11-15 23:11:48
retweets                                                                 126
favorites                                                                786
adj_date                                                 2019-11-16 04:41:48
type                                                                 regular
follower_data                                                           True
template_exposure                                                          0

In [34]:
zero_exposure_tweeters.query('outdeg == 0 and indeg == 0').join(user_df, on='username', rsuffix='_userdf').sort_values('followers')

Unnamed: 0_level_0,username,id,retweet_from,template,retweet_id,text,date,retweets,favorites,adj_date,...,id_userdf,location,description,followers,friends,statuses,type_userdf,new_description,temp,group
adj_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-05-10 19:16:15,rabta25,1126845912460840960,,,,@satyasanatanind @opindia_com @aktkadmin @zee5...,2019-05-10 13:46:15,0,0,2019-05-10 19:16:15,...,893853582134358016,,,1,4,1632,6_non-participant,,non-participant,0
2019-07-12 12:02:57,rabta25,1149567300502081537,,,,@rohini_sgh @boogiepest @one_by_two @theprinti...,2019-07-12 06:32:57,0,0,2019-07-12 12:02:57,...,893853582134358016,,,1,4,1632,6_non-participant,,non-participant,0
2019-06-20 19:37:56,rabta25,1141709270473330690,,,,@aktkadmin @zee5telugu @bjpsamvad @bjp4india @...,2019-06-20 14:07:56,0,0,2019-06-20 19:37:56,...,893853582134358016,,,1,4,1632,6_non-participant,,non-participant,0
2019-05-03 21:33:34,rabta25,1124343752892358656,,,,@tv1telugu @bengalurufc @blrcitypolice @incind...,2019-05-03 16:03:34,0,0,2019-05-03 21:33:34,...,893853582134358016,,,1,4,1632,6_non-participant,,non-participant,0
2019-05-02 18:33:10,rabta25,1123935964655702020,,,,@incindia @jaitdp @satyasanatanind @opindia_co...,2019-05-02 13:03:10,0,0,2019-05-02 18:33:10,...,893853582134358016,,,1,4,1632,6_non-participant,,non-participant,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-03-09 19:19:40,astradiscover,1104378720956137472,,,,😀waiting for my first subscriber😀 -&gt;https:/...,2019-03-09 13:49:40,1,1,2019-03-09 19:19:40,...,,,,,,,6_non-participant,,non-participant,0
2019-03-09 19:21:00,astradiscover,1104379058492706816,,,,😀waiting for my first subscriber😀 -&gt;https:/...,2019-03-09 13:51:00,1,1,2019-03-09 19:21:00,...,,,,,,,6_non-participant,,non-participant,0
2019-03-09 20:25:46,astradiscover,1104395355846836227,,,,😀waiting for my first subscriber😀 -&gt;https:/...,2019-03-09 14:55:46,1,1,2019-03-09 20:25:46,...,,,,,,,6_non-participant,,non-participant,0
2019-03-09 20:16:48,astradiscover,1104393100909268992,,,,😀waiting for my first subscriber -&gt;https://...,2019-03-09 14:46:48,1,1,2019-03-09 20:16:48,...,,,,,,,6_non-participant,,non-participant,0


In [None]:
namedtuple

def label_tweet(text):
    
    if 'https://t.co' in text:
        print('link')
    
    if  

In [35]:
def print_tweets(df):
    for time, row in df.iterrows():
        print(f'{row.text}\n--{row.username}, {row.followers} followers, {row.friends} friends\n')

In [36]:
print_tweets(zero_exposure_tweeters.query('outdeg == 0 and indeg == 0').\
             join(user_df, on='username', rsuffix='_userdf').sort_values('followers', ascending=False))

https://t.co/bugpyahjqp #5aralıkdünyakadınhaklarıgünü #masterchefbr #felizmiércoles #wednesdaywisdom #goknightsgo #theflash #maari2trailer #uritrailer #urisurgicalstrike #म्हारा_वोट_भाजपा_को #gsat11
--lamb_ruthann, 208 followers, 1399 friends

--lamb_ruthann, 208 followers, 1399 friends

https://t.co/eptu6f7sim #5aralıkdünyakadınhaklarıgünü #masterchefbr #felizmiércoles #wednesdaywisdom #goknightsgo #theflash #maari2trailer #uritrailer #urisurgicalstrike #म्हारा_वोट_भाजपा_को #gsat11
--lamb_ruthann, 208 followers, 1399 friends

#5aralıkdünyakadınhaklarıgünü #masterchefbr #felizmiércoles #wednesdaywisdom #goknightsgo #theflash #maari2trailer #uritrailer #urisurgicalstrike #म्हारा_वोट_भाजपा_को #gsat11 https://t.co/mwqv4s35kv
--lamb_ruthann, 208 followers, 1399 friends

https://t.co/uxcsm9ybje #5aralıkdünyakadınhaklarıgünü #masterchefbr #felizmiércoles #wednesdaywisdom #goknightsgo #theflash #maari2trailer #uritrailer #urisurgicalstrike #म्हारा_वोट_भाजपा_को #gsat11
--lamb_ruthann, 208 foll

In [28]:
print_tweets(zero_exposure_tweeters.query('outdeg == 0 and indeg == 0'))

make this video most disliked video please --&gt;https://t.co/nzsdetcsda&lt;-- #pulwamaterrorattacks #successofmakeinindia #exposedeshdrohis #timeforrevenge geelani yasin malik abdul ghani bhat mirwaiz umar farooq #jharkhandwithmodi ck khanna
--astradiscover

#merapmmeraabhimaan "india can’t work from the centre, says raghuram rajan" "india works from every home, frm every kitchen, frm every field, frm every shed, frm every factory, frm every office, frm every school, frm every hospital, and we are all inspired by our pm."
--nobeatinground1

शरद यादव ने कल बोफ़ोर्स ग़लती से नहीं बल्कि जानबूझकर कहा था क्यूँकि बुड्ढा हमारे राहुल बाबा से जलता है ओर उनकी जगह मायावती को प्रधानमंत्री बनाना चाहता है 😂😂😂 #merapmmeraabhimaan #meraboothsabsemazboot #namoagain2019 #modi5yearchallenge
--memerhoonbsdk

#merapmmeraabhimaan for the first time we have a pm whose family has not made politics a business. every single member of his family is living an ordinary indian citizen's life. if india has to progr

In [32]:
zero_exposure_tweeters.join(user_df, on='username', rsuffix='_userdf')

Unnamed: 0_level_0,username,id,retweet_from,template,retweet_id,text,date,retweets,favorites,adj_date,...,id_userdf,location,description,followers,friends,statuses,type_userdf,new_description,temp,group
adj_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-02-17 17:39:00,sanjeevkr868,1097105854694752272,,,,pm मोदी ने रविवार को हजारीबाग में झारखंड को कई...,2019-02-17 12:09:00,0,0,2019-02-17 17:39:00,...,2441946972,,"समर शेष है, नही पाप का भागी केवल व्याध, जो त...",2442,3526,34652,4_other,"समर शेष है, नही पाप का भागी केवल व्याध, जो त...",समर शेष है नही पाप का भागी केवल व्याध जो तटस्...,1
2019-09-11 23:06:06,bjp4india,1171839848732340224,,,,प्रधानमंत्री श्री @narendramodi के 12 सितम्बर ...,2019-09-11 17:36:06,292,1594,2019-09-11 23:06:06,...,207809313,"6-a, deen dayal upadhyay marg,",official twitter account of the bharatiya jana...,12102221,3,193020,6_non-participant,,non-participant,0
2019-09-12 14:13:38,bjp4india,1172068238961692672,,,,आज यहां आदिवासी बच्चों की शिक्षा और उनके कौशल ...,2019-09-12 08:43:38,133,487,2019-09-12 14:13:38,...,207809313,"6-a, deen dayal upadhyay marg,",official twitter account of the bharatiya jana...,12102221,3,193020,6_non-participant,,non-participant,0
2019-09-12 15:13:56,bjp4india,1172083411713835008,,,,मैंने कहा था कि नई सरकार बनते ही पीएम किसान सम...,2019-09-12 09:43:56,550,3004,2019-09-12 15:13:56,...,207809313,"6-a, deen dayal upadhyay marg,",official twitter account of the bharatiya jana...,12102221,3,193020,6_non-participant,,non-participant,0
2019-02-17 15:55:00,bjp4india,1097079676378312704,,,,3 वर्ष पहले झारखण्ड में केवल 3 मेडिकल कॉलेज थे...,2019-02-17 10:25:00,699,2102,2019-02-17 15:55:00,...,207809313,"6-a, deen dayal upadhyay marg,",official twitter account of the bharatiya jana...,12102221,3,193020,6_non-participant,,non-participant,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-10-12 15:54:40,sourabhtamesh,1050693726907977729,,,,@rahulgandhi #gappupappu https://t.co/ximdwjkjzo,2018-10-12 10:24:40,0,0,2018-10-12 15:54:40,...,802408362239520768,ganj basoda,"विद्यार्थी, जिज्ञासु, आत्मविश्वासी, गर्वित भार...",119,150,1861,6_non-participant,,non-participant,0
2018-10-17 18:44:43,aparichit70,1052548460736651264,,,,#gappupappu https://t.co/q9h58e3smq,2018-10-17 13:14:43,0,0,2018-10-17 18:44:43,...,1048465504967516160,"basoda, india","😃indian😃, fan apne @narendramodi ka",23,199,1182,6_non-participant,,non-participant,0
2018-10-12 06:44:39,ljitu5183,1050555310841241600,,,,#navratri अवश्य जानिए कि दुर्गा जी सब बातों को...,2018-10-12 01:14:39,0,0,2018-10-12 06:44:39,...,,,,,,,6_non-participant,,non-participant,0
2018-10-12 14:16:00,ravita4ever,1050669029860421632,,,,"मेरा pm चोर हैं, गरीबो को लुट रहा हैं, अगर इमा...",2018-10-12 08:46:00,14,20,2018-10-12 14:16:00,...,3590393133,इंडिया,"न मै कोई पत्रकार हूँ और न ही कोई लेखक, हाँ जिं...",8139,78,25739,6_non-participant,,non-participant,0
