## Import Libraries 

In [2]:
! pip install networkx
! pip install plotly
! pip install colorlover
!pip install stop-words
import nltk
nltk.download('stopwords') 
import networkx as nx
import pandas as pd
from collections import Counter
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import *
import plotly.graph_objects as go
import random
import colorlover as cl
from IPython.display import HTML
import plotly
import matplotlib.pyplot as plt
import seaborn as sns
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from stop_words import get_stop_words
import re
from sklearn.preprocessing import MinMaxScaler

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Load Data

In [0]:
df = pd.read_csv("tweets2009-06-0115.csv.zip", sep='\t', compression='zip')

In [23]:
print("Num of rows:", df.shape[0])

Num of rows: 3437690


In [24]:
df.head()

Unnamed: 0,date,user,tweet
0,2009-06-01 21:43:59,burtonator,No Post Title
1,2009-06-01 21:47:23,burtonator,No Post Title
2,2009-06-02 01:15:44,burtonator,No Post Title
3,2009-06-02 05:17:52,burtonator,No Post Title
4,2009-06-02 23:58:25,burtonator,No Post Title


## Q2. Build a Mention Graph

### Q2 (a)

In [0]:
greenTag = df[df["tweet"].str.lower().str.contains("#green", na=False)].copy()

In [0]:
def addMentionedColumn(df):
    
    def mentionsList(txt):
        allWords = [word.strip(""" ,.:'\";""").lower() for word in txt.split()]
        allNames = [word.strip("@") for word in allWords if word.startswith("@")]
        uniqueNames = list(set(allNames))
        return allNames
    
    df["mentioned"] = df["tweet"].apply(mentionsList)

In [27]:
addMentionedColumn(greenTag)
greenTag.head(10)

Unnamed: 0,date,user,tweet,mentioned
2811,2009-06-11 17:02:05,maxgladwell,"RT @sustainablog: [susblog] #green Food, Inc. ...",[sustainablog]
9042,2009-06-11 17:11:30,greennewstweets,us news feed http://us.newstweeters/com/feed/ ...,[]
11490,2009-06-11 17:13:50,sea4ever,@ecopond Top 10 Eco-Travel Mistakes- What envi...,[ecopond]
13598,2009-06-11 17:16:40,gridwise,Pew Finds Clean Energy Economy Generates Signi...,[]
13644,2009-06-11 17:16:41,novacharge,Pew Finds Clean Energy Economy Generates Signi...,[]
15295,2009-06-11 17:18:46,absolutelytrue,"RT @greenbiztweets: How many ""Green Jobs"" real...","[greenbiztweets, marcgunther]"
16680,2009-06-11 17:21:05,accion_usa,RT @absolutelytrue @greenbiztweets How many Gr...,"[absolutelytrue, greenbiztweets]"
19027,2009-06-11 17:23:45,dictate,Lost a small item? This AltUse trick is a grea...,[]
40110,2009-06-11 17:59:18,ram0na,On the way to Boston to see the Sox/Yanks! #re...,[]
41545,2009-06-11 18:01:16,peterclayton,#Green - Excellent Book - Energy Shift: Game-C...,[]


In [0]:
def mentionGraph(df):
    g = nx.Graph()
    
    for (index, date, user, tweet, mentionedUsers) in df.itertuples():
        for mentionedUser in mentionedUsers:
            if (user in g) and (mentionedUser in g[user]):
                g[user][mentionedUser]["numberMentions"] += 1
            else:
                g.add_edge(user, mentionedUser, numberMentions=1)
    
    return g

In [29]:
greenGraph = mentionGraph(greenTag)
print("# nodes:", len(greenGraph.nodes()))
print("# edges:", len(greenGraph.edges()))

# nodes: 740
# edges: 752


Number of Nodes: 740 <br>
Number of Edges: 752

### Q2 (b)

In [30]:
degree_sequence = sorted([d for n, d in greenGraph.degree()], reverse=True)  # degree sequence
layout = Layout(showlegend=False, title="Degree Distribution", xaxis_title="Degree", yaxis_title="Frequency")
fig = Figure(data=[go.Histogram(x=degree_sequence)], layout=layout)
iplot(fig, show_link=False)

In [31]:
print("User with the highest degree is:",[n for n, d in greenGraph.degree() if d==60][0])

User with the highest degree is: rmack


**Answer 2(b)**: <br>


*   The degree distribtion is positively skewed i.e. more than half of the users/nodes have a degree of 1
*   A degree of 1 suggests that either the given user (say A) is mentioning only one user or only one user is mentioning a given user (A)
*  There is a presence of an outlier node/user becuase it's the only user with a degree of 60. The user was obtained as 'rmack' as shown above.
*   The degree of 60 suggests that either majority of the users are mentioning this user ('rmack') or this user('rmack') is mentioning majority of the other different users 



### Q2 (c)

In [0]:
def getNtopEdges(graph, n):
 return sorted(greenGraph.edges(data=True),key= lambda x: x[2]['numberMentions'],reverse=True)[0:n]
 #https://stackoverflow.com/questions/52440518/finding-maximum-weighted-edge-in-a-networkx-graph-in-python

In [33]:
getNtopEdges(greenGraph,5)

[('delenn88', 'jessicavickery', {'numberMentions': 5}),
 ('rtmeme', 'chowjackie', {'numberMentions': 4}),
 ('mommyperks', 'recyclemycle', {'numberMentions': 3}),
 ('rmack', 'mranti', {'numberMentions': 3}),
 ('rmack', 'popoever', {'numberMentions': 3})]

### Q2 (d)

In [0]:
def getgreenText(df):
    return (' '.join(row['tweet'] for index, row in df.iterrows()))

In [0]:
def getGreenUserTweets(df):
  return df.groupby('user').apply(getgreenText).to_frame(name='tweet').reset_index()

In [0]:
def getTopNwordsUser(Text,n):
  greenText=re.sub(r'\d+', '',Text)
  tokenizer = RegexpTokenizer(r'\w+')
  greenTokens= tokenizer.tokenize(greenText)
  stop_words = list(get_stop_words('english'))
  nltk_words = list(stopwords.words('english')) #About 150 stopwords
  stop_words.extend(nltk_words)
  stop_words.extend(['green', 'http', 'com', 'www'])
  greenTokens = [w.lower() for w in greenTokens if not w.lower() in stop_words]
  word_counter = Counter()
  word_counter.update(greenTokens)
  return list(dict(word_counter.most_common(n)).keys())

In [0]:
def getTop3wordsUsers(df):
  greenUserTweets=getGreenUserTweets(df)
  top3wordsUsers=[]
  for index, row in greenUserTweets.iterrows():
    top3wordsUsers.append(getTopNwordsUser(row['tweet'],3))
  return dict(zip(list(greenUserTweets.user.values), top3wordsUsers))

In [0]:
top3wordsUsers=getTop3wordsUsers(greenTag)

In [0]:
def addRandomPositions(graph):
    posDict = dict((node,(random.gauss(0,10),random.gauss(0,10))) for node in graph.nodes())
    nx.set_node_attributes(graph, name="pos", values=posDict)

In [0]:
addRandomPositions(greenGraph)

In [51]:
# map purd color scale to 300 cells
bugn = cl.scales['9']['seq']['BuGn']
bugn300 = cl.interp(bugn, 300)
HTML(cl.to_html(bugn300))

In [73]:
# map purd color scale to 300 cells
purd = cl.scales['9']['seq']['PuRd']
purd300 = cl.interp(purd, 300)
HTML(cl.to_html(purd300))

In [0]:
def plotNetworkSizeEdgeColor(graph,centrality):
    if centrality=='degree':
        Cent=nx.degree_centrality(greenGraph)
    elif centrality=='betweeness':
        Cent=nx.betweenness_centrality(greenGraph)
    elif centrality=='pagerank':
        Cent=nx.pagerank(greenGraph)
    else:
        if centrality=='closeness centrality':
            Cent=nx.closeness_centrality(greenGraph)

    maxCentr = max(Cent.values())
    minCentr = min(Cent.values())
    edgewidths=[graph[node1][node2]['numberMentions'] for (node1, node2) in graph.edges()]

    scatters=[]

    for (node1, node2) in graph.edges():
        x0, y0 = graph.nodes[node1]['pos']
        x1, y1 = graph.nodes[node2]['pos']
        edgeWidth = graph[node1][node2]['numberMentions']
        edgeColor=int(299*((edgeWidth-min(edgewidths))/(max(edgewidths)-min(edgewidths))))
        s = Scatter(
            x=[x0, x1],
            y=[y0, y1],
            hoverinfo='none',
            mode='lines', 
            line=scatter.Line(width=edgeWidth ,color=bugn300[edgeColor]))
        scatters.append(s)

    for node in graph.nodes():
        nodeCentr = Cent[node]
        nodeColor = int(299*(nodeCentr-minCentr)/(maxCentr-minCentr))
        xPos, yPos = graph.nodes[node]['pos']
        if node in greenTag.user.unique():
          s = Scatter(
                  x=[xPos], 
                  y=[yPos], 
                  text="User: {0}\n, Top 3: {1}, {2}:{3}".format(node, top3wordsUsers[node],centrality,nodeCentr),
                  hoverinfo='text',
                  mode='markers', 
                  marker=dict(
                      color=purd300[nodeColor], 
                      size=nx.degree(graph,node)*2,                       #size=nx.degree(graph,node)*2,        
                      line=dict(width=2)))
        else:
          s = Scatter(
                  x=[xPos], 
                  y=[yPos], 
                  text="User: {0}\nTop 3: None".format(node),
                  hoverinfo='text',
                  mode='markers', 
                  marker=dict(
                      color=purd300[nodeColor], 
                      size=nx.degree(graph,node)*2,         
                      line=dict(width=2)))

        scatters.append(s)
    
    layout = Layout(plot_bgcolor='rgba(50,50,50,50)', showlegend=False)
    fig = Figure(data=scatters, layout=layout)
    iplot(fig, show_link=False)

In [75]:
# configure_plotly_browser_state()
plotNetworkSizeEdgeColor(greenGraph, 'betweeness')

## Q3. Content Analysis

### Q3 (a)

In [0]:
def getTopNwords(df,n):
  greenText=getgreenText(greenTag)
  greenText=re.sub(r'\d+', '',greenText)
  tokenizer = RegexpTokenizer(r'\w+')
  greenTokens= tokenizer.tokenize(greenText)
  stop_words = list(get_stop_words('english'))
  nltk_words = list(stopwords.words('english')) #About 150 stopwords
  stop_words.extend(nltk_words)
  stop_words.extend(['green', 'http', 'com', 'www', 'rt'])
  greenTokens = [w.lower() for w in greenTokens if not w.lower() in stop_words]
  word_counter = Counter()
  word_counter.update(greenTokens)
  return word_counter.most_common(n)

In [78]:
getTopNwords(greenTag,30)

[('ly', 358),
 ('bit', 324),
 ('greendam', 198),
 ('tinyurl', 106),
 ('news', 92),
 ('eco', 80),
 ('iran', 66),
 ('greenpeace', 59),
 ('rmack', 57),
 ('iranelection', 57),
 ('digg', 54),
 ('solar', 52),
 ('china', 50),
 ('twitter', 49),
 ('please', 49),
 ('wear', 46),
 ('greenscream', 46),
 ('world', 44),
 ('energy', 41),
 ('tomorrow', 41),
 ('us', 39),
 ('gd', 36),
 ('freedom', 36),
 ('tcot', 34),
 ('censorware', 34),
 ('p', 33),
 ('via', 33),
 ('sustainable', 32),
 ('retweet', 32),
 ('environment', 31)]

**Answer 3(a)**: <br>

*   The presence of the most common words such as the 'sustainable', 'environment', 'greenpeace', 'solar', 'eco' etc. suggests that the general theme of the revolves around **sustainable developement** using renewable sources of energy to improve the environement

*   However, that's not the only theme! Looking at the words such as the 'greendam' intrigued my curiosity to explore what was it! On searching the web, I realized that back in the year 2009 Chinese government imposed the schools to install **'greendam'- a software meant for censoring explicit contents and protect the children** [http://www.circleid.com/posts/20090608_chinas_green_dam_youth_escort_software/]. Thus, there was this huge arguement of "civil rights v/s protecting the children". Following this there was a book "**The Worldwide Struggle for Internet Freedom**" by Rebecca MacKinnon talking about internet freedom. The author of this book is '@rmack' who is being mentioned by all the users. This story connects with the fact that @rmack stood out and was a outlier with the highest degree of 60. Also, this story justifies the prescence of the chinese tweets





### Q3 (b)

In [81]:
# map purd color scale to 300 cells
purd = cl.scales['9']['seq']['PuRd']
purd300 = cl.interp(purd, 300)
HTML(cl.to_html(purd300))

In [0]:
def plotNetworkSizeColor(graph,centrality):
    if centrality=='degree':
        Cent=nx.degree_centrality(graph)
    elif centrality=='betweeness':
        Cent=nx.betweenness_centrality(graph)
    elif centrality=='pagerank':
        Cent=nx.pagerank(graph)
    else:
        if centrality=='closeness centrality':
            Cent=nx.closeness_centrality(graph)

    maxCentr = max(Cent.values())
    minCentr = min(Cent.values())
    
    scatters=[]

    for (node1, node2) in graph.edges():
        x0, y0 = graph.nodes[node1]['pos']
        x1, y1 = graph.nodes[node2]['pos']
        edgeWidth = graph[node1][node2]['numberMentions']
        s = Scatter(
                x=[x0, x1],
                y=[y0, y1],
                hoverinfo='none',
                mode='lines', 
                line=scatter.Line(width=edgeWidth ,color='#888'))
        scatters.append(s)

    for node in graph.nodes():
        nodeCentr = Cent[node]
        nodeColor = int(299*(nodeCentr-minCentr)/(maxCentr-minCentr))
        xPos, yPos = graph.nodes[node]['pos']
        if node in greenTag.user.unique():
          s = Scatter(
                  x=[xPos], 
                  y=[yPos], 
                  text="User: {0}\n, Top 3: {1}, {2}:{3}".format(node, top3wordsUsers[node],centrality,nodeCentr),
                  hoverinfo='text',
                  mode='markers', 
                  marker=dict(
                      color=purd300[nodeColor], 
                      size=nx.degree(graph,node)*2,         
                      line=dict(width=2)))
        else:
          s = Scatter(
                  x=[xPos], 
                  y=[yPos], 
                  text="User: {0}\nTop 3: None".format(node),
                  hoverinfo='text',
                  mode='markers', 
                  marker=dict(
                      color=purd300[nodeColor], 
                      size=nx.degree(graph,node)*2,         
                      line=dict(width=2)))

        scatters.append(s)
    
    layout = Layout(showlegend=False)
    fig = Figure(data=scatters, layout=layout)
    iplot(fig, show_link=False)

In [83]:
plotNetworkSizeColor(greenGraph, 'closeness centrality')

## Q4. Centrality Analysis

### Q4 (a)

In [84]:
degreeCentrality=nx.degree_centrality(greenGraph)
print(degreeCentrality)

{'maxgladwell': 0.009472259810554804, 'sustainablog': 0.0027063599458728013, 'sea4ever': 0.0013531799729364006, 'ecopond': 0.012178619756427606, 'absolutelytrue': 0.005412719891745603, 'greenbiztweets': 0.012178619756427606, 'marcgunther': 0.0040595399188092015, 'accion_usa': 0.0027063599458728013, 'urbandecorsue': 0.0013531799729364006, 'tkpleslie': 0.0013531799729364006, 'drmeyer1': 0.0013531799729364006, '': 0.006765899864682004, 'mmaine2008': 0.0040595399188092015, 'msnmoneystories': 0.0013531799729364006, 'lyleses07': 0.0013531799729364006, 'lavsage': 0.0013531799729364006, 'globalalert': 0.0027063599458728013, 'twilightearth': 0.0013531799729364006, 'libbypatterson': 0.0013531799729364006, 'cristalanngee': 0.0013531799729364006, 'blondishnet': 0.0013531799729364006, 'h2ohio': 0.0013531799729364006, 'travelinggreen': 0.010825439783491205, 'viaarchitecture': 0.0027063599458728013, 'treehugger': 0.0027063599458728013, 'marydavidge': 0.0027063599458728013, 'alicesw': 0.00135317997293

In [85]:
betweennessCentrality=nx.betweenness_centrality(greenGraph)
print(betweennessCentrality)

{'maxgladwell': 0.0019124210186621487, 'sustainablog': 0.0016685552511817405, 'sea4ever': 0.0, 'ecopond': 0.0012174952602029402, 'absolutelytrue': 6.173043236972734e-05, 'greenbiztweets': 0.0017712355743313862, 'marcgunther': 5.5007315973024414e-06, 'accion_usa': 0.0, 'urbandecorsue': 0.0, 'tkpleslie': 0.0, 'drmeyer1': 0.0, '': 4.5839429977520344e-05, 'mmaine2008': 0.00012835040393705696, 'msnmoneystories': 0.0, 'lyleses07': 0.0, 'lavsage': 0.0, 'globalalert': 0.00016502194791907324, 'twilightearth': 0.0, 'libbypatterson': 0.0, 'cristalanngee': 0.0, 'blondishnet': 0.0, 'h2ohio': 0.0, 'travelinggreen': 0.0013421785097417957, 'viaarchitecture': 0.0004950658437572197, 'treehugger': 0.0006454191740834864, 'marydavidge': 0.00033737820463454975, 'alicesw': 0.0, 'supplychainnewz)': 0.0, 'meachuta': 0.0, 'prisbrasil': 0.0, 'mommyperks': 0.0, 'recyclemycle': 0.0, 'petticoatpirate': 0.0, 'hsec': 0.0, 'psiphoninc': 0.0005390716965356392, 'jukhau': 0.0, '3rdwhale': 0.0, 'greenmob': 0.0010158017683

In [86]:
pageRank=nx.pagerank(greenGraph)
print(pageRank)

{'maxgladwell': 0.0029235523189443226, 'sustainablog': 0.0010263412265810552, 'sea4ever': 0.0007651060789399665, 'ecopond': 0.005935350177916117, 'absolutelytrue': 0.0018950119508296644, 'greenbiztweets': 0.004335762178206623, 'marcgunther': 0.0014828521148084785, 'accion_usa': 0.0010148940793433699, 'urbandecorsue': 0.0013513513513513514, 'tkpleslie': 0.0013513513513513514, 'drmeyer1': 0.0007553014724482137, '': 0.003239046333816175, 'mmaine2008': 0.001998793187759044, 'msnmoneystories': 0.000769093190963964, 'lyleses07': 0.000769093190963964, 'lavsage': 0.0007767901972929965, 'globalalert': 0.001351019099249859, 'twilightearth': 0.0013513513513513514, 'libbypatterson': 0.0013513513513513514, 'cristalanngee': 0.0013513513513513514, 'blondishnet': 0.0013513513513513514, 'h2ohio': 0.000750922081264735, 'travelinggreen': 0.0051506012124904535, 'viaarchitecture': 0.0013611088264540946, 'treehugger': 0.0012903610280132474, 'marydavidge': 0.001438125029516439, 'alicesw': 0.00135135135135135

In [87]:
closenessCentrality=nx.closeness_centrality(greenGraph)
print(closenessCentrality)

{'maxgladwell': 0.015434290384383499, 'sustainablog': 0.0139808370298003, 'sea4ever': 0.01250361931324639, 'ecopond': 0.015561569688768605, 'absolutelytrue': 0.010787981514344175, 'greenbiztweets': 0.012673685600184824, 'marcgunther': 0.009390742944715261, 'accion_usa': 0.010713837311496449, 'urbandecorsue': 0.0013531799729364006, 'tkpleslie': 0.0013531799729364006, 'drmeyer1': 0.0040595399188092015, '': 0.0069592112893872024, 'mmaine2008': 0.007515353388154471, 'msnmoneystories': 0.0058855177136149465, 'lyleses07': 0.0058855177136149465, 'lavsage': 0.014534664074788952, 'globalalert': 0.018837689623246206, 'twilightearth': 0.0013531799729364006, 'libbypatterson': 0.0013531799729364006, 'cristalanngee': 0.0013531799729364006, 'blondishnet': 0.0013531799729364006, 'h2ohio': 0.01403592560163443, 'travelinggreen': 0.01800835737568191, 'viaarchitecture': 0.010188649207991722, 'treehugger': 0.011765006255265912, 'marydavidge': 0.008933314205287871, 'alicesw': 0.0013531799729364006, 'supplyc

### Q4 (b)

In [88]:
plotNetworkSizeColor(greenGraph, 'degree')

In [64]:
plotNetworkSizeColor(greenGraph, 'betweeness')

In [65]:
plotNetworkSizeColor(greenGraph, 'pagerank')

### Q4 (c)

In [66]:
pd.DataFrame(degreeCentrality.items(), columns=['User', 'DegreeCentrality']).sort_values(by='DegreeCentrality', ascending = False)

Unnamed: 0,User,DegreeCentrality
45,rmack,0.081191
74,isaac,0.027064
271,ecosaveology,0.021651
305,xiefang,0.020298
505,dupola,0.020298
...,...,...
416,greenit,0.001353
415,glfceo,0.001353
414,solaroy,0.001353
413,wholesolar,0.001353


In [67]:
pd.DataFrame(betweennessCentrality.items(), columns=['User', 'betweennessCentrality']).sort_values(by='betweennessCentrality', ascending = False)

Unnamed: 0,User,betweennessCentrality
45,rmack,0.030606
74,isaac,0.010197
505,dupola,0.007160
86,popoever,0.005986
190,sirsteven,0.004133
...,...,...
388,agrinewshound,0.000000
126,ralphtresvant,0.000000
386,feedingtheglobe,0.000000
385,brainchildassoc,0.000000


In [68]:
pd.DataFrame(pageRank.items(), columns=['User', 'pageRank']).sort_values(by='pageRank', ascending = False)

Unnamed: 0,User,pageRank
45,rmack,0.023841
271,ecosaveology,0.008610
74,isaac,0.008428
122,facetweet,0.006753
3,ecopond,0.005935
...,...,...
532,xumingyuan,0.000505
527,mcliu,0.000505
526,terryxxy,0.000505
586,cozboo,0.000505


**Answer 4(c)**: <br>

Centrality | Key Players 
--- | --- 
Betweeness Centrality | rmack, isaac, dupola, popoever, sirsteven
Page Rank | rmack, ecosaveology, isaac, facetweet, soninhafrancine
Degree Centrality | rmack,isaac, ecosaveology, xiefang, dupola  

1. The results are different especially for the Page Rank and Betweeness Centrality. In terms of the context of the theme Betweeness Centrality and Degree Centrality gave similar results.** The main cause of the difference is that the betweeness centrality detects community while page rank gives weight to the authority**. Betweenness centrality of a node v is the sum of the fraction of all-pairs shortest paths that pass through v[https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.betweenness_centrality.html#networkx.algorithms.centrality.betweenness_centrality]
2. Betweeness produces meaningful intrepretation because it was able to detect the community talking about the 'greeedam' problem in China back in 2009. Moreover, all the keyplayers in this community were high rated and thus appeared in top 5 players. On the other hand, Page Rank wasn't successful in detecting community because it ranked the nodes with highest authority thus gave key players who were posting about different topics altogether and were not related. 



## Q5. Connectivity Patterns

### Q5 (a) Cliques

#### Q5 (a)(1) Number of maximal cliques

In [69]:
print("Number of maximal cliques:",nx.graph_number_of_cliques(greenGraph))

Number of maximal cliques: 588


#### Q5 (a)(2) Graph's clique number

In [70]:
print("Size of the largest clique:",nx.graph_clique_number(greenGraph))

Size of the largest clique: 5


#### Q5 (a)(3) Number of maximal cliques for each node

In [71]:
print("Number of maximal cliques for each node:",nx.number_of_cliques(greenGraph))

Number of maximal cliques for each node: {'maxgladwell': 4, 'sustainablog': 2, 'sea4ever': 1, 'ecopond': 9, 'absolutelytrue': 3, 'greenbiztweets': 8, 'marcgunther': 3, 'accion_usa': 1, 'urbandecorsue': 1, 'tkpleslie': 1, 'drmeyer1': 1, '': 5, 'mmaine2008': 3, 'msnmoneystories': 1, 'lyleses07': 1, 'lavsage': 1, 'globalalert': 2, 'twilightearth': 1, 'libbypatterson': 1, 'cristalanngee': 1, 'blondishnet': 1, 'h2ohio': 1, 'travelinggreen': 8, 'viaarchitecture': 2, 'treehugger': 2, 'marydavidge': 2, 'alicesw': 1, 'supplychainnewz)': 1, 'meachuta': 1, 'prisbrasil': 1, 'mommyperks': 1, 'recyclemycle': 1, 'petticoatpirate': 1, 'hsec': 1, 'psiphoninc': 2, 'jukhau': 1, '3rdwhale': 1, 'greenmob': 4, 'luso_shanghai': 1, 'chelseagreen': 3, 'gwconfidence': 1, 'cobrahead': 1, 'tavdb': 1, 'yourdailythread': 1, 'aaronposehn': 1, 'rmack': 52, 'jabolins': 1, 'scottsdaleaz': 1, 'azbigmedia': 1, 'cr8tivecitizen': 4, 'greendig': 1, 'candita': 1, 'molfamily': 1, 'feng37': 3, 'ranyunfei': 4, 'stephhicks': 4, 

#### Q5 (a)(4) Size of the largest maximal clique containing each given node

In [72]:
print("Size of the largest maximal clique containing each given node:",nx.node_clique_number(greenGraph))

Size of the largest maximal clique containing each given node: {'maxgladwell': 3, 'sustainablog': 2, 'sea4ever': 2, 'ecopond': 2, 'absolutelytrue': 3, 'greenbiztweets': 3, 'marcgunther': 2, 'accion_usa': 3, 'urbandecorsue': 2, 'tkpleslie': 2, 'drmeyer1': 2, '': 2, 'mmaine2008': 2, 'msnmoneystories': 2, 'lyleses07': 2, 'lavsage': 2, 'globalalert': 2, 'twilightearth': 2, 'libbypatterson': 2, 'cristalanngee': 2, 'blondishnet': 2, 'h2ohio': 2, 'travelinggreen': 2, 'viaarchitecture': 2, 'treehugger': 2, 'marydavidge': 2, 'alicesw': 2, 'supplychainnewz)': 2, 'meachuta': 2, 'prisbrasil': 2, 'mommyperks': 2, 'recyclemycle': 2, 'petticoatpirate': 2, 'hsec': 2, 'psiphoninc': 2, 'jukhau': 2, '3rdwhale': 2, 'greenmob': 2, 'luso_shanghai': 2, 'chelseagreen': 2, 'gwconfidence': 2, 'cobrahead': 2, 'tavdb': 2, 'yourdailythread': 2, 'aaronposehn': 2, 'rmack': 4, 'jabolins': 2, 'scottsdaleaz': 2, 'azbigmedia': 2, 'cr8tivecitizen': 2, 'greendig': 2, 'candita': 2, 'molfamily': 2, 'feng37': 3, 'ranyunfei':

### Q5 (b)

**Answer 5(b)**: <br>
The number of maximal cliques was obtained as 588 suggests that there are in all 588 cliques formed of all the nodes. The size of the largest clique was 5 i.e the larget clique was formed of 5 nodes. Similarily, the cliques for each nodes were tracked and the size of the largest clique for each node was obtained. The cliques aid in detecting the clusters formed by the nodes in the graph. In context of the chosen hastag #green, cliques can aid in determining the users posting about a similar top. In this case, it was observed that there was an highlight of #greendam in the tweets in 2009. So the users posting about the greeedam or its context formed a clique. Similarily, users posting about #green in context of the environmnet formed another cliques. Thus, this can of analysis on cliques helps in finding set of users talking a similar topics/ context. 
