# Table for Pre-War Posts

In [1]:
import json
import pandas as pd
import numpy as np

# Load the JSON files
with open('communities2023-07-01_2023-10-06.json') as f:
    communities = json.load(f)

# Initialize the table
table = []

In [2]:
# Converting back to a datafame
def import_from_json(json_file_path):
    '''
    Function to import a dictionary containing DataFrames from a JSON file.
    
    Parameters:
    json_file_path (str): Path to the input JSON file.
    
    Returns:
    dict: Dictionary where keys are identifiers and values are DataFrames.
    '''
    
    # Read the JSON file into a dictionary
    with open(json_file_path, 'r') as json_file:
        dict_of_json = json.load(json_file)
    
    # Convert each JSON-compatible structure back into a DataFrame
    dict_of_dfs = {key: pd.DataFrame(value) for key, value in dict_of_json.items()}
    
    return dict_of_dfs


topics = import_from_json('topic_0701_to_1006.json')

In [3]:
total_df = pd.DataFrame()
# Adding community number and number of authors to each df
for community, authors in communities.items():
    #topics[community]["interpretation"] = "?"
    topics[community]["community"] = community # adding community index
    topics[community]["#authors"] = len(authors) # adding num of authors
    topics[community]["#posts"] = topics[community]['Count'].sum() # adding number of posts considered
    
    # Dropping outlier topic
    topics[community] = topics[community][topics[community]['Topic']!=-1]

    # Dropping topics that have less than 1/20 of total posts
    topics[community] = topics[community][topics[community]['Count']/topics[community]['#posts'] >= 1/20]

    # Stacking dataframes on top of each other
    total_df = pd.concat([total_df, topics[community]], ignore_index=True)

In [4]:
count = 0
for i in range(len(topics)):
    count += len(topics[str(i)])
count

17

In [5]:
total_df = total_df.sort_values(by=['community','Topic']).reset_index(drop=True)

In [6]:
for i,topic in enumerate(total_df.Representation):
    print(i,topic)

0 ['israel', 'have', 'palestinians', 'people', 'state', 'jews', 'country', 'jewish', 'say', 'israeli']
1 ['israel', 'have', 'state', 'jews', 'palestinians', 'people', 'arab', 'palestinian', 'jewish', 'right']
2 ['anti', 'jews', 'jewish', 'zionism', 'religion', 'judaism', 'semitism', 'zionist', 'people', 'have']
3 ['rule', 'post', 'reddit', 'user', 'comment', 'mod', 'content', 'attack', 'remove', 'sub']
4 ['black', 'white', 'americans', 'people', 'reparation', 'native', 'jews', 'indigenous', 'american', 'america']
5 ['jews', 'have', 'israel', 'palestinians', 'palestine', 'people', 'state', 'palestinian', 'country', 'right']
6 ['state', 'israel', 'palestinians', 'palestine', 'palestinian', 'solution', 'want', 'peace', 'have', 'support']
7 ['palestinians', 'jews', 'palestinian', 'palestine', 'disaster', 'say', 'pro', 'see', 'terrorism', 'correct']
8 ['thank', 'remindme', 'point', 'hour', 'leave', 'interesting', 'reply', 'good', 'read', 'sure']
9 ['anti', 'zionism', 'zionist', 'antisemitis

In [7]:
total_df.at[0, 'interpretation'] = "State Legitimacy"
total_df.at[1, 'interpretation'] = "State Legitimacy"
total_df.at[2, 'interpretation'] = "(Anti) Zionism"
total_df.at[3, 'interpretation'] = "Reddit Rules"
total_df.at[4, 'interpretation'] = "Americans, Reparation"
total_df.at[5, 'interpretation'] = "State Legitimicy"
total_df.at[6, 'interpretation'] = "State Legitimacy, Peace"
total_df.at[7, 'interpretation'] = "Palestinians Jews Terrorism"
total_df.at[8, 'interpretation'] = "Politeness"
total_df.at[9, 'interpretation'] = "(Anti) Zionism"
total_df.at[10, 'interpretation'] = "Civilian, Target, IDF, Terrorist, Settler"
total_df.at[11, 'interpretation'] = "Israel-Palestine Peace"
total_df.at[12, 'interpretation'] = "State Legitimacy, Religion"
total_df.at[13, 'interpretation'] = "Reddit Rules"
total_df.at[14, 'interpretation'] = "Politeness"
total_df.at[15, 'interpretation'] = "State Legitimacy"
total_df.at[16, 'interpretation'] = "State Legitimacy"

In [8]:
total_df = total_df[['community', '#authors', '#posts', "interpretation"] + [col for col in topics[community].columns if col not in ['community', '#authors', "#posts","Name", "Representative_Docs"]]]
total_df

Unnamed: 0,community,#authors,#posts,interpretation,Topic,Count,Representation
0,0,6,863,State Legitimacy,0,617,"[israel, have, palestinians, people, state, je..."
1,1,23,1326,State Legitimacy,0,1275,"[israel, have, state, jews, palestinians, peop..."
2,2,32,1540,(Anti) Zionism,0,175,"[anti, jews, jewish, zionism, religion, judais..."
3,2,32,1540,Reddit Rules,1,135,"[rule, post, reddit, user, comment, mod, conte..."
4,2,32,1540,"Americans, Reparation",2,97,"[black, white, americans, people, reparation, ..."
5,2,32,1540,State Legitimicy,3,84,"[jews, have, israel, palestinians, palestine, ..."
6,3,24,882,"State Legitimacy, Peace",0,65,"[state, israel, palestinians, palestine, pales..."
7,3,24,882,Palestinians Jews Terrorism,1,56,"[palestinians, jews, palestinian, palestine, d..."
8,3,24,882,Politeness,2,48,"[thank, remindme, point, hour, leave, interest..."
9,3,24,882,(Anti) Zionism,3,47,"[anti, zionism, zionist, antisemitism, jews, r..."


In [9]:
with open('table_0701_1006.tex', 'w') as f:
    f.write(total_df.to_latex(index=False))

# Table for Post-War Posts

In [10]:
# Load the JSON files
with open('communities2023-10-07_2023-12-31.json') as f:
    communities = json.load(f)

# Initialize the table
table = []

# Importing TM
topics = import_from_json('topic_1007_to_1231.json')

In [11]:
total_df = pd.DataFrame()
# Adding community number and number of authors to each df
for community, authors in communities.items():
    topics[community]["community"] = community # adding community index
    topics[community]["#authors"] = len(authors) # adding num of authors
    topics[community]["#posts"] = topics[community]['Count'].sum() # adding number of posts considered

    # Dropping outlier topic
    topics[community] = topics[community][topics[community]['Topic']!=-1]

    # Dropping topics that have less than 1/20 of total posts
    topics[community] = topics[community][topics[community]['Count']/topics[community]['#posts'] >= 1/20]

    # Stacking dataframes on top of each other
    total_df = pd.concat([total_df, topics[community]], ignore_index=True)

In [12]:
count = 0
for i in range(len(topics)):
    count += len(topics[str(i)])
count

10

In [13]:
total_df = total_df.sort_values(by=['community','Topic']).reset_index(drop=True)

In [14]:
for i,topic in enumerate(total_df.Representation):
    print(i,topic)

0 ['israel', 'hamas', 'have', 'people', 'palestinians', 'jews', 'gaza', 'war', 'say', 'want']
1 ['israel', 'hamas', 'have', 'people', 'palestinians', 'jews', 'say', 'think', 'gaza', 'state']
2 ['anti', 'zionism', 'jews', 'antisemitism', 'semitic', 'zionist', 'semitism', 'antisemitic', 'jewish', 'hate']
3 ['hamas', 'support', 'terrorist', 'oct', 'civilian', 'celebrate', 'thousand', 'isis', 'massacre', 'condemn']
4 ['hamas', 'destroy', 'goal', 'eliminate', 'action', 'leadership', 'group', 'terrorist', 'power', 'remove']
5 ['propaganda', 'side', 'say', 'provide', 'fact', 'read', 'lie', 'argument', 'source', 'see']
6 ['gaza', 'hamas', 'israel', 'want', 'people', 'murder', 'peace', 'palestinians', 'make', 'have']
7 ['israel', 'citizen', 'right', 'marriage', 'have', 'israeli', 'equal', 'israelis', 'class', 'fact']
8 ['land', 'palestine', 'jordan', 'jews', 'create', 'empire', 'buy', 'own', 'england', 'muslims']
9 ['islam', 'jews', 'muslims', 'jew', 'antisemitism', 'muslim', 'antisemitic', 're

In [15]:
total_df.at[0, 'interpretation'] = "?"

In [16]:
total_df.at[0, 'interpretation'] = "War"
total_df.at[1, 'interpretation'] = "War, State Legitimacy"
total_df.at[2, 'interpretation'] = "(Anti) Zionism"
total_df.at[3, 'interpretation'] = "Hamas, Terrorist, Massacre"
total_df.at[4, 'interpretation'] = "Hamas, Terrorist"
total_df.at[5, 'interpretation'] = "Propaganda, Lie, Source"
total_df.at[6, 'interpretation'] = "State Legitimacy, Peace"
total_df.at[7, 'interpretation'] = "People, Murder, Peace"
total_df.at[8, 'interpretation'] = "State Legitimacy"
total_df.at[9, 'interpretation'] = "(Anti) Zionism"

In [17]:
total_df = total_df[['community', '#authors', '#posts', "interpretation"] + [col for col in topics[community].columns if col not in ['community', '#authors', "#posts","Name", "Representative_Docs"]]]
total_df

Unnamed: 0,community,#authors,#posts,interpretation,Topic,Count,Representation
0,1,21,10737,War,0,10660,"[israel, hamas, have, people, palestinians, je..."
1,2,18,5244,"War, State Legitimacy",0,5166,"[israel, hamas, have, people, palestinians, je..."
2,3,17,6749,(Anti) Zionism,0,493,"[anti, zionism, jews, antisemitism, semitic, z..."
3,4,18,5369,"Hamas, Terrorist, Massacre",0,468,"[hamas, support, terrorist, oct, civilian, cel..."
4,5,22,5394,"Hamas, Terrorist",0,300,"[hamas, destroy, goal, eliminate, action, lead..."
5,6,15,1404,"Propaganda, Lie, Source",0,341,"[propaganda, side, say, provide, fact, read, l..."
6,6,15,1404,"State Legitimacy, Peace",1,102,"[gaza, hamas, israel, want, people, murder, pe..."
7,6,15,1404,"People, Murder, Peace",2,83,"[israel, citizen, right, marriage, have, israe..."
8,6,15,1404,State Legitimacy,3,73,"[land, palestine, jordan, jews, create, empire..."
9,7,8,3923,(Anti) Zionism,0,345,"[islam, jews, muslims, jew, antisemitism, musl..."


In [18]:
with open('table_1007_1231.tex', 'w') as f:
    f.write(total_df.to_latex(index=False))