# Markov Model Design
This code reads in each one of the datasets created by our webscraper and concatenates them into one large data frame. The data frame is then normalized for each origin category to create a markov simulation

In [1]:
import time
import pandas as pd
import numpy as np
import scipy.linalg as la
import random
np.set_printoptions(suppress=True)
pd.set_option('display.float_format', lambda x: '%.10f' % x)

In [2]:
#sets an ideology per channel
ideology = pd.read_csv("Hard Tagged Channels.csv")
ideology = ideology[ideology[["CHANNEL_TITLE","IDEOLOGY"]] != pd.NA][["CHANNEL_TITLE","IDEOLOGY"]]
ideology['IDEOLOGY'].value_counts()

IDW                        128
Alt-light                   97
Social Justice              84
Conspiracy                  80
Partisan Right              52
Partisan Left               52
Alt-right                   44
Socialist                   36
Libertarian                 36
Anti-Theist                 29
MRA                         18
Religious Conservative      13
Revolutionary Socialist     10
Anti-white                   3
Name: IDEOLOGY, dtype: int64

In [12]:
#creates the Markov Probability matrix
test = pd.read_csv("combined counter.csv").drop(columns = "Unnamed: 0")

#sets categories for current channel
test = pd.merge(test,ideology, how = "left", left_on = "Current Channel", right_on = "CHANNEL_TITLE")
test["Current Ideology"] = test["IDEOLOGY"]
test = test.drop(columns = ["CHANNEL_TITLE","IDEOLOGY", "Category"])

#sets categories into Next channel and removes others
test = pd.merge(test,ideology, how = "left", left_on = "Next Channel", right_on = "CHANNEL_TITLE")
test = test.rename({"IDEOLOGY":"Next Ideology"}, axis = 1)[["Current Ideology","Next Ideology","Count","Current Views"]].fillna("Others")
test = test.rename({"Count":"Amount"}, axis = 1)
test = test[test["Next Ideology"] != "Others"].reset_index().drop(columns = "index")

#creates the impiressions
test["Impressions"] = test["Current Views"]*test["Amount"]
test = test.drop(columns = ["Amount","Current Views"]).groupby(['Current Ideology','Next Ideology']).sum().reset_index()
test


Unnamed: 0,Current Ideology,Next Ideology,Amount,Current Views
0,Alt-light,Alt-light,3,84226
1,Alt-light,Partisan Left,2,84226
2,Alt-light,Alt-light,1,14405
3,Alt-light,Social Justice,2,14405
4,Alt-light,Partisan Left,1,14405
...,...,...,...,...
3864,Socialist,Conspiracy,1,75753
3865,Socialist,Socialist,2,6247
3866,Socialist,Socialist,2,6247
3867,Socialist,Partisan Left,1,6247


In [4]:
test = pd.merge(test,test.groupby("Current Ideology")['Impressions'].sum(), how = "left",left_on = "Current Ideology",right_on = "Current Ideology")
test["Probability"] = test["Impressions_x"]/test["Impressions_y"]
test = test.drop(columns = ["Impressions_x","Impressions_y"])
test

Unnamed: 0,Current Ideology,Next Ideology,Probability
0,Alt-light,Alt-light,0.6631098735
1,Alt-light,Conspiracy,0.1037438787
2,Alt-light,IDW,0.0410371235
3,Alt-light,Libertarian,0.0347588694
4,Alt-light,MRA,0.0000739129
...,...,...,...
108,Socialist,Partisan Left,0.0507189392
109,Socialist,Partisan Right,0.0014392062
110,Socialist,Revolutionary Socialist,0.0017022585
111,Socialist,Social Justice,0.1208246799


In [8]:
#Creates the Markov Matrix
#test = test.groupby(["Current Ideology","Next Ideology"]).sum().reset_index()
topics = test["Current Ideology"].unique()
markovmatrix = pd.DataFrame()
for topic in topics:
    temp = test[test["Current Ideology"] == topic].transpose().rename(columns = test["Next Ideology"], index = {"Probability":topic}).iloc[2:]
    markovmatrix = markovmatrix.append(temp)
markovmatrix = markovmatrix.fillna(0)
markovmatrix = markovmatrix.reindex(sorted(markovmatrix.columns),axis = 1).drop(index = "Others")
markovmatrix
#markovmatrix.to_csv("Markov Matrix without Others.csv")

Unnamed: 0,Alt-light,Alt-right,Anti-Theist,Anti-white,Conspiracy,IDW,Libertarian,MRA,Partisan Left,Partisan Right,Religious Conservative,Revolutionary Socialist,Social Justice,Socialist
Alt-light,0.6631098735,0.0,0.0,0.0,0.1037438787,0.0410371235,0.0347588694,7.39129e-05,0.0390835587,0.1096868148,0.0,0.0,0.0079596261,0.0005463423
Alt-right,0.0012815676,0.2505349017,0.0,0.0,0.0023441966,0.1438045466,0.5344498939,0.0,0.0079914512,0.0074172368,0.0,0.0,0.0520979379,7.82675e-05
Anti-Theist,0.0,0.0,0.8759086586,0.0,0.0061586314,0.0094117244,0.0,0.0,0.0718145195,0.0028025127,0.0,0.0,0.0339039533,0.0
Anti-white,0.0005384772,0.0,0.0,0.3410279165,0.0,0.0,0.0,0.0,0.4863300289,0.0379745915,0.0,0.0,0.1341289859,0.0
Conspiracy,0.0,0.0,0.0,0.0025380078,0.6491394375,0.0115997967,0.0995763308,0.0,0.037167784,0.1740387189,0.0,0.0,0.0251752556,0.0007646688
IDW,0.0231807989,0.0,0.0081702689,0.0,0.0097457597,0.9379670184,0.0051567176,0.0,0.0064821166,0.0007958597,0.0,0.0,0.0066207374,0.0018807229
Libertarian,0.0,0.0,0.0,0.0,0.0,0.1296005608,0.8127525475,0.0,0.0143291237,0.0219444303,0.0,0.0,0.0175002796,0.0038730581
MRA,0.0114460749,0.0,0.0,0.0,0.0843006403,0.1761954746,0.041284816,0.494835464,0.0239259826,0.0382537344,0.0,0.0,0.1297578131,0.0
Partisan Left,0.0,0.0,0.0,0.0,0.0034488437,0.0,9.60967e-05,0.0,0.8401660429,0.0279191493,0.0,0.0,0.1283683678,1.4995e-06
Partisan Right,0.0009944246,0.0,0.0,0.0,0.0011821953,7.35985e-05,0.1146161799,0.0,0.0574713277,0.8208875558,0.0,0.0,0.0047747182,0.0


In [11]:
#Markov Steady State Simulation
#matrix = pd.read_csv("Markov Matrix without Others.csv").drop(columns = "Unnamed: 0")
vec = np.zeros((len(markovmatrix),1))
vec[0:len(markovmatrix)] = 1/len(markovmatrix)

for _ in range(100):
    vec = np.dot(np.transpose(markovmatrix),vec)

probdf = pd.DataFrame(vec, index = ['Alt-light', 'Alt-right', 'Anti-Theist', 'Anti-white', 'Conspiracy',
       'IDW', 'Libertarian', 'MRA', 'Partisan Left', 'Partisan Right',
       'Religious Conservative', 'Revolutionary Socialist', 'Social Justice',
       'Socialist'])
probdf = probdf.rename({0:"Steady State Probability"},axis = 1)
probdf
#probdf.to_csv("Steady State Probability without Others.csv")

Unnamed: 0,Steady State Probability
Alt-light,0.0108586565
Alt-right,0.0
Anti-Theist,0.0096345887
Anti-white,5.64173e-05
Conspiracy,0.0127135811
IDW,0.1445220407
Libertarian,0.0467458365
MRA,1.5925e-06
Partisan Left,0.1345514062
Partisan Right,0.0533000004


This model is scaled taking into accounts of video views, thus it shows what video types more people are exposed towards