# Indexer for Search Engine with Ranking

In [84]:
import pandas as pd 

## Import Websites from json file

In [85]:
websites = pd.read_json("data/table.json")
df = pd.DataFrame(websites)
df = df.reset_index()

## Create new Dictionary to store website on database with following schema

```
string: Website 
Website: {
  url: string,
  outgoingLinks: string[],
  incomingLinks: string[]
}
```

In [86]:
websites_dict = {}

for index, row in df.iterrows():
    url = row['url']
    if websites_dict.get(url) is None:
        websites_dict[url] = {
            "url": url,
            "outgoingLinks": list(set(row['metadata']['links'])),
            "outgoingLinksLen":len(row['metadata']['links']),
            "incomingLinks": [],
            "incomingLinksLen": 0 
        }

## Assign Incoming Links to the created dictionary

In [88]:
for website in websites_dict:
    links = websites_dict[website]["outgoingLinks"]
    for link in links:
        linkInDict = websites_dict.get(link)
        if linkInDict is not None:
            websites_dict[link]["incomingLinks"].append(website)
            websites_dict[link]["incomingLinksLen"] = websites_dict[link]["incomingLinksLen"] + 1

website_pd = pd.DataFrame.from_dict(websites_dict)
website_pd.head(30).T

Unnamed: 0,url,outgoingLinks,outgoingLinksLen,incomingLinks,incomingLinksLen
https://nepal.gov.np,https://nepal.gov.np,[https://nepal.gov.np:8443/NationalPortal/NP?s...,3,[],0
https://moha.gov.np,https://moha.gov.np,"[https://moha.gov.np/page/directive-framework,...",114,[],0
https://p1.gov.np,https://p1.gov.np,[https://moa.p1.gov.np/notice/general-notice/d...,100,[],0
http://p2.gov.np,http://p2.gov.np,"[https://madhesh.gov.np/node/152, https://madh...",26,[],0
http://p3.gov.np,http://p3.gov.np,[],0,[],0
...,...,...,...,...,...
http://www.ugcnepal.edu.np/,http://www.ugcnepal.edu.np/,"[http://www.ugcnepal.edu.np/division/5, http:/...",118,[https://nepal.gov.np:8443/NationalPortal/view...,6
http://www.wecs.gov.np/,http://www.wecs.gov.np/,[http://www.wecs.gov.np/storage/listies/June20...,56,[https://nepal.gov.np:8443/NationalPortal/view...,10
http://www.nmc.org.np/,http://www.nmc.org.np/,"[https://nmc.org.np/news, https://exam.nmc.org...",33,[https://nepal.gov.np:8443/NationalPortal/view...,4
http://nhrc.gov.np/,http://nhrc.gov.np/,[https://nhrc.gov.np/publication-category/byla...,79,[https://nepal.gov.np:8443/NationalPortal/view...,12


## Save Dictionary Data to JSON file

In [83]:
website_pd.to_json("websites.json")

## Save Dictionary Data to MongoDB