# Markov Model Design
This code reads in each one of the datasets created by our webscraper and concatenates them into one large data frame. The data frame is then normalized for each origin category to create a markov simulation

In [63]:
import time
import pandas as pd
import numpy as np
import scipy.linalg as la
import random
np.set_printoptions(suppress=True)
pd.set_option('display.float_format', lambda x: '%.10f' % x)

In [2]:
#sets an ideology per channel
ideology = pd.read_csv("Hard Tagged Channels.csv")
ideology = ideology[ideology[["CHANNEL_TITLE","IDEOLOGY"]] != np.nan][["CHANNEL_TITLE","IDEOLOGY"]]
ideology['IDEOLOGY'].value_counts()

IDW                        128
Alt-light                   97
Social Justice              84
Conspiracy                  80
Partisan Right              52
Partisan Left               52
Alt-right                   44
Libertarian                 36
Socialist                   36
Anti-Theist                 29
MRA                         18
Religious Conservative      13
Revolutionary Socialist     10
Anti-white                   3
Name: IDEOLOGY, dtype: int64

In [66]:
#creates the Markov Probability matrix
test = pd.read_csv("combined tags.csv").drop(columns = "Unnamed: 0")

#sets categories for current channel
test = test.rename({"Category":"Current Tag"}, axis = 1)

#sets categories into Next channel and removes others
test = test.rename({"TAGS":"Next Tag"}, axis = 1)[["Current Tag","Next Tag","Count","Current Views"]].fillna("Others")
test = test.rename({"Count":"Amount"}, axis = 1)
test = test[test["Next Tag"] != "Others"].reset_index().drop(columns = "index")

#creates the impiressions
test["Impressions"] = test["Current Views"]*test["Amount"]
test = test.drop(columns = ["Amount","Current Views"]).groupby(['Current Tag','Next Tag']).sum().reset_index()
test


Unnamed: 0,Current Tag,Next Tag,Impressions
0,AntiSJW,AIN,106360.0000000000
1,AntiSJW,AntiSJW,10415722.0000000000
2,AntiSJW,AntiTheist,3891964.0000000000
3,AntiSJW,Conspiracy,34311090.0000000000
4,AntiSJW,Educational,323834.0000000000
...,...,...,...
288,WhiteIdentitarian,SocialJustice,827871.0000000000
289,WhiteIdentitarian,Socialist,2896.0000000000
290,WhiteIdentitarian,StateFunded,1381317.0000000000
291,WhiteIdentitarian,TV,1641450.0000000000


In [67]:
test = pd.merge(test,test.groupby("Current Tag")['Impressions'].sum(), how = "left",left_on = "Current Tag",right_on = "Current Tag")
test["Probability"] = test["Impressions_x"]/test["Impressions_y"]
test = test.drop(columns = ["Impressions_x","Impressions_y"])
test

Unnamed: 0,Current Tag,Next Tag,Probability
0,AntiSJW,AIN,0.0014016091
1,AntiSJW,AntiSJW,0.1372580918
2,AntiSJW,AntiTheist,0.0512881922
3,AntiSJW,Conspiracy,0.4521505798
4,AntiSJW,Educational,0.0042674754
...,...,...,...
288,WhiteIdentitarian,SocialJustice,0.0287083236
289,WhiteIdentitarian,Socialist,0.0001004254
290,WhiteIdentitarian,StateFunded,0.0479003316
291,WhiteIdentitarian,TV,0.0569210394


In [69]:
#Creates the Markov Matrix
#test = test.groupby(["Current Ideology","Next Ideology"]).sum().reset_index()
topics = test["Current Tag"].unique()
markovmatrix = pd.DataFrame()
for topic in topics:
    temp = test[test["Current Tag"] == topic].transpose().rename(columns = test["Next Tag"], index = {"Probability":topic}).iloc[2:]
    markovmatrix = markovmatrix.append(temp)
markovmatrix = markovmatrix.fillna(0)
markovmatrix = markovmatrix.reindex(sorted(markovmatrix.columns),axis = 1)#.drop(index = "Others")
markovmatrix
#markovmatrix.to_csv("Markov Matrix without Others.csv")

Unnamed: 0,AIN,AntiSJW,AntiTheist,AntiWhiteness,Conspiracy,Educational,LateNightTalkShow,Libertarian,MRA,Mainstream News,...,PartisanLeft,PartisanRight,Provocateur,ReligiousConservative,Revolutionary,SocialJustice,Socialist,StateFunded,TV,WhiteIdentitarian
AntiSJW,0.0014016091,0.1372580918,0.0512881922,0.0,0.4521505798,0.0042674754,0.0,0.0518215179,0.0010885664,0.0697111946,...,0.0025960605,0.1024481891,0.0005867756,0.0118906422,0.0,0.002440086,0.0,0.0011521105,0.0321226243,0.0034810667
AntiTheist,0.0080047647,0.0833488749,0.7963373549,0.0,0.0050694681,0.0277694575,0.0,7.27076e-05,0.0,0.0001932979,...,0.0161082024,0.002873749,0.0,0.0005652672,0.0,0.0076673936,0.0077676768,0.0033391579,0.0037837229,0.0
AntiWhiteness,0.0,0.0,0.0,0.2118056464,0.0,0.0,0.0159428343,0.0,0.0,0.1379129752,...,0.1453893887,0.0540372788,0.1536744519,0.0,0.0,0.085583195,0.0,0.0032847332,0.1881447307,0.0
Conspiracy,0.0040147342,0.0003268456,0.0,0.0,0.2229290553,0.0003451866,0.0,0.3304095934,0.0,0.0945153482,...,0.0047203327,0.182873595,0.0008446255,0.0037325065,0.0,0.0368669519,0.0,0.0481664049,0.0643282156,0.0
Educational,9.09056e-05,0.0004990516,0.0014553655,0.0,0.0,0.6818954334,0.0220970286,5.6724e-05,0.0,0.0114996658,...,0.0107384734,0.0003236447,0.0,0.0001334135,0.0,0.0115277152,6.7955e-06,0.0005785647,0.0157219683,0.0
LateNightTalkShow,0.0,0.0,0.0,0.0,0.0,0.0,0.278421863,0.0,0.0,0.0047266134,...,0.2440216832,5.49812e-05,0.0,0.0,0.0,0.2896217117,0.0,0.0,0.1828102472,0.0
Libertarian,0.0013307468,0.0251094262,0.0,0.0,0.0,0.0002441013,0.0,0.4598733047,0.0,0.085666705,...,0.0168246025,0.2118844571,0.0,0.0075825092,0.0,5.7856e-05,0.0,0.0058575645,0.1221823961,0.0
MRA,0.0019448455,0.2270318809,0.0,0.0,0.0303702672,0.014365851,0.0036164145,0.1157349073,0.1647571612,0.0785778611,...,0.0606133898,0.0338590401,0.0,0.0211065041,0.0,0.0721693348,0.0,0.0848448284,0.0806314604,0.0
Mainstream News,0.0,0.0,0.0,0.0301806102,0.0,0.0083285488,0.0136719441,0.0,0.0,0.4266383478,...,0.0502707526,0.0424939492,0.0,0.0,0.0,0.0390032816,0.0,0.0388304592,0.3408730503,0.0
ManoelAltLite,0.0193210336,0.0908549821,0.0,0.001488541,0.0108322886,0.0008699354,0.001386592,0.1521380561,0.0,0.0765270833,...,0.0293640663,0.2444200702,0.000171292,0.0113319269,0.0,0.0826193717,0.0,0.0022516879,0.1182210565,0.001137615


In [6]:
#Markov Steady State Simulation
#matrix = pd.read_csv("Markov Matrix without Others.csv").drop(columns = "Unnamed: 0")
vec = np.zeros((len(markovmatrix),1))
vec[0:len(markovmatrix)] = 1/len(markovmatrix)

for _ in range(100):
    vec = np.dot(np.transpose(markovmatrix),vec)

probdf = pd.DataFrame(vec, index = ['PartisanRight','Conspiracy','PartisanLeft','SocialJustice',
 'Libertarian','AntiSJW', 'MissingLinkMedia','WhiteIdentitarian','AIN',
 'ManoelAltRight','MRA','ReligiousConservative','ManoelIDW','Provocateur',
 'ManoelAltLite','Socialist','Mainstream News','TV', 'AntiTheist', 'StateFunded',
 'Educational', 'LateNightTalkShow', 'Revolutionary', 'AntiWhiteness',
 'Politician'])
probdf = probdf.rename({0:"Steady State Probability"},axis = 1)
probdf
#probdf.to_csv("Steady State Probability without Others.csv")

Unnamed: 0,Steady State Probability
Alt-light,0.0121494791
Alt-right,0.0
Anti-Theist,0.0105913078
Anti-white,5.86406e-05
Conspiracy,0.0133317371
IDW,0.1590053198
Libertarian,0.0522580782
MRA,1.7808e-06
Partisan Left,0.129394042
Partisan Right,0.044289455


This model is scaled taking into accounts of video views, thus it shows what video types more people are exposed towards

In [74]:
uniquetags = ['PartisanRight','Conspiracy','PartisanLeft','SocialJustice',
 'Libertarian','AntiSJW', 'MissingLinkMedia','WhiteIdentitarian','AIN',
 'ManoelAltRight','MRA','ReligiousConservative','ManoelIDW','Provocateur',
 'ManoelAltLite','Socialist','Mainstream News','TV', 'AntiTheist', 'StateFunded',
 'Educational', 'LateNightTalkShow', 'Revolutionary', 'AntiWhiteness',
 'Politician']


temp = pd.read_csv("combined tags.csv").drop(columns = "Unnamed: 0")

for tag in uniquetags:
    if not (tag in temp["Category"].unique()):
        print(tag)

AIN
ManoelIDW
TV
Revolutionary
Politician


In [60]:
#temp[temp["Category"] == "Anti-white"]