## **0. Import Necessary Libraries**

In [61]:
pip install zemberek-python

Collecting zemberek-python
[?25l  Downloading https://files.pythonhosted.org/packages/df/4f/e006418720e5764a302fd4cc048d8b3003bc80cc2317cdc62254fce3abe6/zemberek_python-0.1.2-py3-none-any.whl (93.6MB)
[K     |████████████████████████████████| 93.6MB 98kB/s 
[?25hCollecting antlr4-python3-runtime>=4.8
[?25l  Downloading https://files.pythonhosted.org/packages/04/9c/d5ef93dc1e5a862cae004a64d15425c2a1ae8ba967a08f03dfb11aedf7bf/antlr4-python3-runtime-4.9.2.tar.gz (117kB)
[K     |████████████████████████████████| 122kB 40.8MB/s 
Building wheels for collected packages: antlr4-python3-runtime
  Building wheel for antlr4-python3-runtime (setup.py) ... [?25l[?25hdone
  Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.2-cp37-none-any.whl size=144568 sha256=acf7c42425f54144de38c7a7733adab10f4c81406e41af2af1ffd63a07fd3ed7
  Stored in directory: /root/.cache/pip/wheels/c6/64/ac/8c89516f9cc7341328d7e4a896d2166514798ee24b753f0ca3
Successfully built antlr4-python3-r

In [1]:
import re
import string as str
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
from gensim.test.utils import common_texts
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from zemberek import (
    TurkishSpellChecker,
    TurkishSentenceNormalizer,
    TurkishSentenceExtractor,
    TurkishMorphology,
    TurkishTokenizer)



## **1. Load DataFrame**

In [None]:
df = pd.read_csv(r'D:\Users\suuser\Desktop\Cesitli\SICSS\data\df_6.csv').drop(columns='Unnamed: 0')

## **2. Define Media Channels' Usernames**

In [107]:
media_channels = ["TurkishIndy", "Irna_Turkish", "aawsat_turkce", "XHTurkey", "AlMonitorTurkce", "RudawTurkce",
                 "CRI_Turkish", "sputnik_TR", "euronews_tr", "AJTurk", "dw_turkce", "bbcturkce", "VOATurkish",
                 ]

In [52]:
tr_media_channels = ["Ahaber", "trthaber", "anadoluajansi", "trthaber", "cnnturk", "ihacomtr", "dhainternet", "Sabah",
                     "Hurriyet", "milliyet", "gazetesozcu", "cumhuriyetgzt", "halktvcomtr", "FOXhaber", "BirGun_Gazetesi",
                     "Haberturk", "ntv", "haber7", "Stargazete", "yenisafak", "turkiyegazetesi", "takvim"]

## **3. Preprocess Data**

### **3.1. Remove Emojis From Tweets**

In [108]:
def deEmojify(text):
    regrex_pattern = re.compile(pattern = "["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           "]+", flags = re.UNICODE)
    return regrex_pattern.sub(r' ',text)

### **3.2. Tokenize & Clean the Text**

In [66]:
zemberek_tokenizer = TurkishTokenizer.DEFAULT

def text_preprocess(text):
    allowed_types = ['WordWithSymbol', 'Word', 'Punctuation']
    
    text = deEmojify(text)
    text = " ".join([token.content for token in zemberek_tokenizer.tokenize(text) if token.type_.name in allowed_types])
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(' +', ' ', text)
    text = [word for word in text.split()]
    return text

### **3.2. Tag Each Document**

In [109]:
tagged_documents = list()
for index, row in df.iterrows():
  if row.user_username in media_channels:
    tagged_documents.append(TaggedDocument(text_preprocess(row.text.lower()), [row.user_username]))

## **4. Create a Model**

In [None]:
model = Doc2Vec(tagged_documents, vector_size=20, min_count=3, epochs=15)

## **5. Network Representation**

In [111]:
G = nx.Graph()
to_observe = list()

for index_1 in range(0, len(model.docvecs.index2entity)):
    for index_2 in range(0, len(model.docvecs.index2entity)):
        if index_1 != index_2:
            weight = 1 / (np.linalg.norm(model.docvecs.vectors_docs[index_1]-model.docvecs.vectors_docs[index_2]))
            if weight*10 > 0.5:
                G.add_edge(model.docvecs.index2entity[index_1], model.docvecs.index2entity[index_2], weight=weight*10)

            to_observe.append((model.docvecs.index2entity[index_1], model.docvecs.index2entity[index_2], weight*10))

In [112]:
nx.write_gml(G,'/content/news_channels.gml')