In [38]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json
import geopandas as gpd
import utils
from utils import util_functions

In [39]:
df_path = 'dataset/state_media_on_social_media_platforms.xlsx'
twitter_dir_path = 'dataset/twitter_accounts_info.csv'
twi_df = pd.read_csv(twitter_dir_path, index_col='username')
# loading the dataset
df = pd.read_excel(df_path, index_col='Name (English)')
df = util_functions.data_preprocessor(df)
df['Name (English)'] = list(df.index)
# Load JSON data from file
with open('assets/recognized_countries.json', 'r') as file:
    data = json.load(file)
recognized_countries = data

with open('assets/equivalent_countries.json', 'r') as file:
    data = json.load(file)
equivalent_countries_dict = data

## preprocess twi_df and merge with df

In [40]:
twi_df = twi_df.sort_values('followers_count', ascending=False)
twi_df['X (Twitter) handle'] = list(twi_df.index)
twi_df = twi_df.drop(columns=['user_id', 'description'], axis=1)
twi_df['followers_following_ratio'] = twi_df['followers_count'] / (twi_df['following_count'] + 1e-8)
twi_df.head(2)
twi_df = twi_df.drop_duplicates()
twi_df['created_at'] = pd.to_datetime(twi_df['created_at'], format='%a %b %d %H:%M:%S +0000 %Y')
# Save the new column in the desired format
twi_df['formatted_created_at'] = twi_df['created_at'].dt.strftime('%Y-%m-%d')
df = pd.merge(df, twi_df, on='X (Twitter) handle', how='outer')

In [41]:
country_focus_count_dict, df = util_functions.find_country_focus_count_dictionary(df, recognized_countries, equivalent_countries_dict)

In [42]:
df.head(1)

Unnamed: 0,Region of Focus,Language,Entity owner (English),Parent entity (English),X (Twitter) handle,X (Twitter) URL,X (Twitter) Follower #,Facebook page,Facebook URL,Facebook Follower #,...,TikTok Subscriber #,Name (English),is_blue_verified,created_at,followers_count,following_count,tweet_num,followers_following_ratio,formatted_created_at,focus group
0,Anglosphere,English,China Media Group (CMG),Central Publicity Department,_bubblyabby_,https://twitter.com/_bubblyabby_,1678.0,itsAbby-103043374799622,https://www.facebook.com/itsAbby-103043374799622,1387432.0,...,660.0,Yang Xinmeng (Abby Yang),True,2014-07-10 02:05:08,2799.0,161.0,851.0,17.385093,2014-07-10,"[Australia, Canada, New Zealand, United Kingdo..."


In [49]:
country_to_accounts_dict = {}
for country_list, name, owner, parent in df[['focus group', 'Name (English)', 'Entity owner (English)', 'Parent entity (English)']].values:
    for country in country_list:
        if country not in list(country_to_accounts_dict.keys()):
            country_to_accounts_dict[country] = {'name': [name], 'owner': [owner], 'parent': [parent]}
        else:
            temp_dic = country_to_accounts_dict[country]
            if name not in list(temp_dic['name']):
                temp_dic['name'].append(name)
            if owner not in list(temp_dic['owner']):
                temp_dic['owner'].append(owner)
            if parent not in list(temp_dic['parent']): 
                temp_dic['parent'].append(parent)
            country_to_accounts_dict[country] = temp_dic

In [50]:
with open('findings/country_corps.json', 'w') as file:
    file.write(json.dumps(country_to_accounts_dict))

In [53]:
with open('findings/country_corps.json', 'r') as file:
    data = json.load(file)


{'Australia': {'name': ['Yang Xinmeng (Abby Yang)', 'CGTN Culture Express', 'Yang Sheng', 'Media Challengers', 'Ge Anna', 'Wang Zeyu (Anosi Wang)', 'We Are China', 'CGTN Assignment Asia', 'Beijing Channel', 'Discover Beijing', 'Belt and Road News Network', 'Zheng Chenlei (Blair Zheng)', 'Tang Bo', 'Wang Cong', 'Cao Li', 'Visual China', 'China Watch', 'CD Opinion', 'China Daily Sci-tech', 'Cen Ziyuan (Jasmine Cen)', 'CGTN Culture', 'CGTN Dialogue', 'CGTN Inheritors', 'CGTN Style', 'CGTN Business', 'CGTN Food', 'CGTN Global Business', 'CGTN Graphics', 'CGTN Movies', 'CGTN', 'CGTN Radio', 'CGTN Sports', 'CGTN Sports Scene', 'CGTN Stories', 'CGTN Tech', 'CGTN Travel', 'CGTN Travelogue', 'CGTN United Nations', 'Chen Juan', 'Chen Weihua', 'China Focus', 'Lin Jing', 'CGTN China Takeaway', 'Chinese Consulate General in Sydney', 'China Culture', 'China Daily', 'China Daily Life', 'China Daily Asia', 'China Daily World', 'China.org.cn (English)', 'China Plus Podcasts', "People's Daily China Scie

In [51]:
country_to_accounts_dict

{'Australia': {'name': ['Yang Xinmeng (Abby Yang)',
   'CGTN Culture Express',
   'Yang Sheng',
   'Media Challengers',
   'Ge Anna',
   'Wang Zeyu (Anosi Wang)',
   'We Are China',
   'CGTN Assignment Asia',
   'Beijing Channel',
   'Discover Beijing',
   'Belt and Road News Network',
   'Zheng Chenlei (Blair Zheng)',
   'Tang Bo',
   'Wang Cong',
   'Cao Li',
   'Visual China',
   'China Watch',
   'CD Opinion',
   'China Daily Sci-tech',
   'Cen Ziyuan (Jasmine Cen)',
   'CGTN Culture',
   'CGTN Dialogue',
   'CGTN Inheritors',
   'CGTN Style',
   'CGTN Business',
   'CGTN Food',
   'CGTN Global Business',
   'CGTN Graphics',
   'CGTN Movies',
   'CGTN',
   'CGTN Radio',
   'CGTN Sports',
   'CGTN Sports Scene',
   'CGTN Stories',
   'CGTN Tech',
   'CGTN Travel',
   'CGTN Travelogue',
   'CGTN United Nations',
   'Chen Juan',
   'Chen Weihua',
   'China Focus',
   'Lin Jing',
   'CGTN China Takeaway',
   'Chinese Consulate General in Sydney',
   'China Culture',
   'China Daily',
 

## Country Focus news_channels counter map prep

In [3]:
country_focus_count_dict, df = util_functions.find_country_focus_count_dictionary(df, recognized_countries, equivalent_countries_dict)

  country_list = df['focus group'][idx]


## Name, Owner, Parent Graph

In [4]:
util_functions.create_name_owner_parent_graph(df)

## Rest