# Markov Model Design
This code reads in each one of the datasets created by our webscraper and concatenates them into one large data frame. The data frame is then normalized for each origin category to create a markov simulation

In [5]:
import time
import pandas as pd
import numpy as np
import scipy.linalg as la
import random
np.set_printoptions(suppress=True)
pd.set_option('display.float_format', lambda x: '%.10f' % x)

In [6]:
#sets a tag per channel
tags = pd.read_csv("Hard Tagged Channels.csv")
content = tags["TAGS"].to_list()
content = [tag.strip('[]""').split(",") for tag in content]
content = [tag[0].strip('""') for tag in content]
tags["TAGS"] = content
tags = tags[["CHANNEL_TITLE","TAGS"]]
tags[tags["TAGS"] == ""] = "Others"
tags

Unnamed: 0,CHANNEL_TITLE,TAGS
0,RedPill78,PartisanRight
1,The Daily Beast,PartisanLeft
2,Others,Others
3,Mercatus Center,Libertarian
4,Anything Goes,AntiSJW
...,...,...
798,Roosh V,ReligiousConservative
799,Daily Caller,PartisanRight
800,Rebel News,PartisanRight
801,Randy Rainbow,SocialJustice


In [9]:
#creates the Markov Probability matrix
dflist = ['PartisanRight', 'PartisanLeft', 'Libertarian',
       'AntiSJW', 'WhiteIdentitarian', 'MRA', 'ReligiousConservative',
       'SocialJustice', 'Mainstream News', 'AntiTheist', 'StateFunded',
       'Conspiracy', 'Educational', 'Socialist', 'Provocateur',
       'LateNightTalkShow', 'ManoelAltRight', 'AntiWhiteness',
       'MissingLinkMedia', 'ManoelAltLite']
dflist = [category +" probability.csv" for category in dflist]

test = pd.DataFrame()

#Binds all the seperate csv files together
for df in dflist:
    test = test.append(pd.read_csv(df))
test = test.drop(columns = "Unnamed: 0")
#sets categories into Next channel and removes others
test = pd.merge(test,tags, how = "left", left_on = "Next Channel", right_on = "CHANNEL_TITLE")
test = test.rename({"TAGS":"Next Category"}, axis = 1)[["Category","Next Category","Count"]].fillna("Others")
test = test.rename({"Count":"Amount"}, axis = 1)
test = test[test["Next Category"] != "Others"]

In [4]:
test = pd.merge(test,test.groupby("Category")['Amount'].sum(), how = "left",left_on = "Category",right_on = "Category")
test["Probability"] = test["Amount_x"]/test["Amount_y"]
test = test.drop(columns = ["Amount_x","Amount_y"])

In [5]:
#Creates the Markov Matrix
test = test.groupby(["Category","Next Category"]).sum().reset_index()
topics = test["Category"].unique()
markovmatrix = pd.DataFrame()
for topic in topics:
    temp = test[test["Category"] == topic].transpose().rename(columns = test["Next Category"], index = {"Probability":topic}).iloc[2:]
    markovmatrix = markovmatrix.append(temp)
markovmatrix = markovmatrix.fillna(0)
markovmatrix["ManoelAltRight"] = 0.0
markovmatrix = markovmatrix.reindex(sorted(markovmatrix.columns),axis = 1)
markovmatrix.to_csv("Markov Matrix without Others.csv")

In [3]:
pd.read_csv("Markov Matrix without Others.csv")

Unnamed: 0.1,Unnamed: 0,AntiSJW,AntiTheist,AntiWhiteness,Conspiracy,Educational,LateNightTalkShow,Libertarian,MRA,Mainstream News,...,ManoelAltRight,MissingLinkMedia,PartisanLeft,PartisanRight,Provocateur,ReligiousConservative,SocialJustice,Socialist,StateFunded,WhiteIdentitarian
0,AntiSJW,0.1362815884,0.0009025271,0.0279783394,0.0,0.0297833935,0.0063176895,0.2572202166,0.0099277978,0.0090252708,...,0.0,0.0036101083,0.0496389892,0.392599278,0.0,0.0216606498,0.0225631769,0.0,0.0261732852,0.0063176895
1,AntiTheist,0.0834285714,0.5462857143,0.0,0.0,0.0308571429,0.1062857143,0.0548571429,0.0,0.0011428571,...,0.0,0.0034285714,0.1097142857,0.0365714286,0.0,0.0,0.0194285714,0.0,0.008,0.0
2,AntiWhiteness,0.0,0.0,0.3640350877,0.0,0.0065789474,0.0942982456,0.0065789474,0.0,0.0460526316,...,0.0,0.0065789474,0.3026315789,0.0416666667,0.0,0.0,0.0679824561,0.0,0.0635964912,0.0
3,Conspiracy,0.0516194332,0.0,0.0,0.0647773279,0.016194332,0.0010121457,0.2206477733,0.0,0.0101214575,...,0.0,0.0,0.0910931174,0.4848178138,0.0,0.0030364372,0.0374493927,0.0,0.0192307692,0.0
4,Educational,0.0504634398,0.0473738414,0.0,0.0,0.5736354274,0.035015448,0.0370751802,0.0,0.0144181256,...,0.0,0.0720906282,0.079299691,0.0082389289,0.0,0.0020597322,0.0679711637,0.0,0.0123583934,0.0
5,LateNightTalkShow,0.0,0.0,0.0,0.0,0.0,0.6426229508,0.0,0.0,0.0024590164,...,0.0,0.0,0.3467213115,0.0040983607,0.0,0.0,0.0032786885,0.0,0.0008196721,0.0
6,Libertarian,0.0435483871,0.0024193548,0.0161290323,0.0032258065,0.0217741935,0.0,0.6403225806,0.0,0.0072580645,...,0.0,0.0008064516,0.0475806452,0.1911290323,0.0,0.0008064516,0.014516129,0.0,0.010483871,0.0
7,MRA,0.1767838126,0.0031948882,0.0244941427,0.0,0.1001064963,0.0074547391,0.268370607,0.0990415335,0.0074547391,...,0.0,0.0031948882,0.0266240682,0.1799787007,0.0,0.0223642173,0.0766773163,0.0,0.0042598509,0.0
8,Mainstream News,0.0017226529,0.0,0.0,0.0,0.0215331611,0.1378122308,0.0129198966,0.0,0.1145564169,...,0.0,0.0,0.3496985357,0.1214470284,0.0,0.0,0.0180878553,0.0,0.2222222222,0.0
9,ManoelAltLite,0.141271443,0.0,0.0,0.0,0.015136226,0.0050454087,0.1614530777,0.0,0.0050454087,...,0.0,0.0080726539,0.0736629667,0.5166498486,0.0,0.0121089808,0.015136226,0.0,0.0413723512,0.0030272452


In [4]:
#Markov Steady State Simulation
matrix = pd.read_csv("Markov Matrix without Others.csv").drop(columns = "Unnamed: 0")
vec = np.zeros((len(matrix),1))
vec[0:len(matrix)] = 1/len(matrix)

for _ in range(100):
    vec = np.dot(np.transpose(matrix),vec)

probdf = pd.DataFrame(vec, index = ['PartisanRight', 'PartisanLeft', 'Libertarian',
       'AntiSJW', 'WhiteIdentitarian', 'MRA', 'ReligiousConservative',
       'SocialJustice', 'Mainstream News', 'AntiTheist', 'StateFunded',
       'Conspiracy', 'Educational', 'Socialist', 'Provocateur',
       'LateNightTalkShow', 'ManoelAltRight', 'AntiWhiteness',
       'MissingLinkMedia', 'ManoelAltLite'])
probdf = probdf.rename({0:"Steady State Probability"},axis = 1)
probdf
#probdf.to_csv("Steady State Probability without Others.csv")

Unnamed: 0,Steady State Probability
PartisanRight,0.037378666
PartisanLeft,0.0092627062
Libertarian,0.0066953365
AntiSJW,0.002635545
WhiteIdentitarian,0.0289233376
MRA,0.2034252498
ReligiousConservative,0.1795803763
SocialJustice,0.0004118812
Mainstream News,0.0146624741
AntiTheist,0.0


Since this model does not take in account of the views, if a viewer were to randomly select a recommended video, they are most likely to end up in educational or Men's Right Activist content.