In [38]:
import requests
import re
from bs4 import BeautifulSoup as bs
import pandas as pd
import csv
from time import sleep
import json


def save_json(filename, data):
    with open(filename, 'w') as f:
        json.dump(data, f)

#### Scrapping Data

In [39]:
systems = ['Sol', 'Achenar', 'Coalsack Sector VU-O b6-6', 'Musca Dark Region PJ-P b6-1', 
           'Musca Dark Region IM-V c2-24', 'Mel 22 Sector GM-V c2-8', 'T Tauri', 'Snake Sector OD-S b4-2', 
           'Synuefai LX-R d5-28', 'Synuefe EN-H d11-96', 'Pencil Sector EL-Y d5', 'Synuefai EB-R c7-5', 
           'Delphi', 'Pleiades Sector IH-V c2-5', 'Takurua', 'Pleiades Sector IH-V c2-7', 'Pleiades Sector KC-V c2-4', 
           'HIP 17692', 'HR 1185', 'Synuefe RT-R c20-7', 'Asterope', 'HR 1183', 'Pleiades Sector HR-W d1-41', 
           'Celaeno', 'HIP 17694', 'HIP 74290', 'Pleiades Sector KC-V c2-11', 'Merope', 'Aditi', 
           'Pleiades Sector PD-S b4-0', 'Pleiades Sector IH-V c2-16', 'Pleione', 'Atlas', 'Maia', 'Almagest', 
           'Taygeta', 'Ceos', 'Fehu', 'Robigo', 'Sothis']

with open('systems_scrape.csv', 'w') as csv_file:

    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(['Distance', 'Source System', 'L-Pad', 'M-pad',
                         'P-pad', 'Fed', 'Imp', 'All', 'Ind', 'Target/Sources',
                         'RES/rings', 'URL'])
    
    for system in systems:

        url = 'https://edtools.cc/pve'
        params = {
            's': system, 
            'md': '250', 
            'sc': '2'
        }
        # We need the headers here in this website.
        # By doing so, the website will think that someone is actually visiting the site using a web browser.
        # Ref.: https://stackoverflow.com/questions/61968521/python-web-scraping-request-errormod-security
        # Check headers params: https://httpbin.org/headers
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36 Edg/103.0.1264.71",
        }

        # Request
        r = requests.get(url, params=params, headers=headers)

        # Soup
        soup = bs(r.content)
        
        # Check if there is system that meet the conditions
        if soup.find(string='No systems found that meet the conditions'):
            continue

        # Table Data
        table_data = soup.find('table', id='sys_tbl')

        table_body = []
        for index, tr in enumerate(table_data.find_all('tr')):
            if index > 0:
                for i, td in enumerate(tr.find_all('td')):
                    table_body.append(td.text.strip())
                    if i == 9:
                        unique_factions_url = url + td.find_all('a')[-1]['href']
                table_body.append(unique_factions_url)
                csv_writer.writerow(table_body)
                table_body.clear()


#### DataFrame

In [40]:
df = pd.read_csv('systems_scrape.csv', thousands=',')

In [41]:
df.shape

(3483, 12)

In [42]:
dfa = df.drop_duplicates(subset=['Source System']) \
        .sort_values('Target/Sources', ascending=False) \
        .reset_index(drop=True) \
        .copy()
dfa

Unnamed: 0,Distance,Source System,L-Pad,M-pad,P-pad,Fed,Imp,All,Ind,Target/Sources,RES/rings,URL
0,143.24,HIP 117865,26.0,476.0,26.0,,3.0,,4.0,Zhou Wang / 2,No rings,https://edtools.cc/pve?s=Zhou+Wang&a=rt
1,146.10,Dango,,516341.0,2640.0,,4.0,,3.0,Zhou Wang / 2,No rings,https://edtools.cc/pve?s=Zhou+Wang&a=rt
2,193.22,Gliese 4261,,,524.0,1.0,,,3.0,Zarya Manas / 3,5 rings,https://edtools.cc/pve?s=Zarya+Manas&a=rt
3,202.98,Lemayak,,3914.0,1579.0,,,,5.0,Zarya Manas / 3,5 rings,https://edtools.cc/pve?s=Zarya+Manas&a=rt
4,202.12,Adlivun,,654.0,,,,,4.0,Zarya Manas / 3,5 rings,https://edtools.cc/pve?s=Zarya+Manas&a=rt
...,...,...,...,...,...,...,...,...,...,...,...,...
966,147.75,Bacarelia,,351.0,3426.0,4.0,,,3.0,19 Aquilae / 2,2 rings,https://edtools.cc/pve?s=19+Aquilae&a=rt
967,142.15,Col 285 Sector PE-G c11-6,,,299.0,3.0,,,4.0,19 Aquilae / 2,2 rings,https://edtools.cc/pve?s=19+Aquilae&a=rt
968,126.44,Arnaqu,,1536.0,54.0,4.0,,,3.0,114 G. Aquilae / 3,3 rings,https://edtools.cc/pve?s=114+G.+Aquilae&a=rt
969,135.63,Col 285 Sector WT-G a40-1,,,118.0,1.0,,,6.0,114 G. Aquilae / 3,3 rings,https://edtools.cc/pve?s=114+G.+Aquilae&a=rt


#### Filtering

In [43]:
dfb = dfa.query("`L-Pad` < 3000") \
        .reset_index(drop=True)

dfb = dfb.loc[dfb['Target/Sources'].str[-1] == '2'] \
        .copy()
dfb

Unnamed: 0,Distance,Source System,L-Pad,M-pad,P-pad,Fed,Imp,All,Ind,Target/Sources,RES/rings,URL
0,143.24,HIP 117865,26.0,476.0,26.0,,3.0,,4.0,Zhou Wang / 2,No rings,https://edtools.cc/pve?s=Zhou+Wang&a=rt
1,111.14,Hakkaia,74.0,,43.0,1.0,1.0,,5.0,Yu Tun / 2,3 rings,https://edtools.cc/pve?s=Yu+Tun&a=rt
2,97.63,BD-00 3426,108.0,4619.0,108.0,4.0,,,3.0,Yavapai / 2,No rings,https://edtools.cc/pve?s=Yavapai&a=rt
3,100.59,Detta,168.0,1642.0,1649.0,4.0,,,3.0,Yavapai / 2,No rings,https://edtools.cc/pve?s=Yavapai&a=rt
4,149.89,Cadubii,122.0,,31205.0,,3.0,,4.0,Xue Davokje / 2,4 rings,https://edtools.cc/pve?s=Xue+Davokje&a=rt
...,...,...,...,...,...,...,...,...,...,...,...,...
289,85.73,Maheou Ti,252.0,342.0,252.0,3.0,,,4.0,Amahu / 2,No rings,https://edtools.cc/pve?s=Amahu&a=rt
292,168.60,Nu Kuan,439.0,618.0,796.0,1.0,,,6.0,Allo / 2,1 ring,https://edtools.cc/pve?s=Allo&a=rt
293,179.61,Medu,2163.0,,1699.0,3.0,,,3.0,Aka / 2,No rings,https://edtools.cc/pve?s=Aka&a=rt
294,146.03,HIP 47255,423.0,240.0,,3.0,,,4.0,Adnyan / 2,No rings,https://edtools.cc/pve?s=Adnyan&a=rt


In [44]:
dfc = dfb.loc[:, 'Target/Sources'] \
        .value_counts() \
        .reset_index() \
        .query('`Target/Sources` == 2') \
        .reset_index(drop=True) \
        .copy()
dfc

Unnamed: 0,index,Target/Sources
0,Millese / 2,2
1,San Yax / 2,2
2,Kundinke / 2,2
3,Svantetit / 2,2
4,Slatas / 2,2
5,Belach / 2,2
6,Skappa / 2,2
7,GCRV 7300 / 2,2
8,Biatae / 2,2
9,Findja / 2,2


In [45]:
dfd = dfb[dfb['Target/Sources'].isin(dfc['index'])] \
        .reset_index(drop=True)

dfd

Unnamed: 0,Distance,Source System,L-Pad,M-pad,P-pad,Fed,Imp,All,Ind,Target/Sources,RES/rings,URL
0,97.63,BD-00 3426,108.0,4619.0,108.0,4.0,,,3.0,Yavapai / 2,No rings,https://edtools.cc/pve?s=Yavapai&a=rt
1,100.59,Detta,168.0,1642.0,1649.0,4.0,,,3.0,Yavapai / 2,No rings,https://edtools.cc/pve?s=Yavapai&a=rt
2,118.51,36 Persei,484.0,3247.0,837.0,3.0,,,4.0,Wolf 186 / 2,No rings,https://edtools.cc/pve?s=Wolf+186&a=rt
3,112.75,27 Kappa Persei,386.0,501.0,386.0,2.0,,,5.0,Wolf 186 / 2,No rings,https://edtools.cc/pve?s=Wolf+186&a=rt
4,169.60,32 Mu Serpentis,1500.0,,1896.0,,,,7.0,Weleutaheim / 2,1 ring,https://edtools.cc/pve?s=Weleutaheim&a=rt
...,...,...,...,...,...,...,...,...,...,...,...,...
85,143.65,Gluskabiku,231.0,,,4.0,,,3.0,Awngtei / 2,No rings,https://edtools.cc/pve?s=Awngtei&a=rt
86,148.69,Na Chac Og,312.0,423.0,238.0,,,,6.0,Atun / 2,No rings,https://edtools.cc/pve?s=Atun&a=rt
87,152.56,Cava,244.0,,,,,,5.0,Atun / 2,No rings,https://edtools.cc/pve?s=Atun&a=rt
88,183.94,Nugua,59.0,15.0,15.0,1.0,,,6.0,Amitae / 2,No rings,https://edtools.cc/pve?s=Amitae&a=rt


In [46]:
# Uncomment cell just to visualize the data

for target_source, frame in dfd.groupby('Target/Sources'):
    print(f"First 2 entries for {target_source!r}")
    print("------------------------")
    print(frame, end="\n\n")

First 2 entries for 'Amitae / 2'
------------------------
    Distance Source System   L-Pad  M-pad  P-pad  Fed  Imp  All  Ind  \
88    183.94         Nugua    59.0   15.0   15.0  1.0  NaN  NaN  6.0   
89    187.08       Lhanayi  1446.0    NaN  660.0  NaN  NaN  NaN  6.0   

   Target/Sources RES/rings                                   URL  
88     Amitae / 2  No rings  https://edtools.cc/pve?s=Amitae&a=rt  
89     Amitae / 2  No rings  https://edtools.cc/pve?s=Amitae&a=rt  

First 2 entries for 'Atun / 2'
------------------------
    Distance Source System  L-Pad  M-pad  P-pad  Fed  Imp  All  Ind  \
86    148.69    Na Chac Og  312.0  423.0  238.0  NaN  NaN  NaN  6.0   
87    152.56          Cava  244.0    NaN    NaN  NaN  NaN  NaN  5.0   

   Target/Sources RES/rings                                 URL  
86       Atun / 2  No rings  https://edtools.cc/pve?s=Atun&a=rt  
87       Atun / 2  No rings  https://edtools.cc/pve?s=Atun&a=rt  

First 2 entries for 'Awngtei / 2'
-----------------

In [47]:
def check_faction_state(state):
    bag_of_words = ['war', 'election', 'civil war']
    if any(word in state.lower() for word in bag_of_words):
        return True
    

unique_factions_data = {}
count = 0
for target_source, frame in dfd.groupby('Target/Sources'):
    
    count += 1
    try:
        new_r = requests.get(frame['URL'].values[0], headers=headers)
    except Exception as e:
        print(e)
    new_soup = bs(new_r.content)
    
    table_body = {}
    keep_going = True
    for index, tr in enumerate(new_soup.body.table.find_all('tr')):
        if not keep_going:
            break
        elif index > 0:
            rows = []
            for i, td in enumerate(tr.find_all('td')):
                if i == 0:
                    key = td.text.strip()
                else:
                    value = td.text.strip()
                    rows.append(value)
                    # Checking faction state
#                     if len(rows) == 4:
#                         if check_faction_state(rows[3]):
#                             table_body.clear()
#                             keep_going = False
#                             break
                    table_body[key] = rows
                    
    if table_body:           
        unique_factions_data[frame['Target/Sources'].values[0]] = table_body.copy()
    if count % 10 == 0:
        print(f'{count} iterations.')
    sleep(new_r.elapsed.total_seconds())

10 iterations.
20 iterations.
30 iterations.
40 iterations.


In [48]:
print(json.dumps(unique_factions_data, indent=2))

{
  "Amitae / 2": {
    "1": [
      "The Ken'Tarii Mandate",
      "Independent",
      "Theocracy",
      "Expansion",
      "Lhanayi, Nugua"
    ],
    "2": [
      "Lhanayi General PLC",
      "Independent",
      "Corporate",
      "None",
      "Lhanayi"
    ],
    "3": [
      "Lhanayi Confederacy",
      "Independent",
      "Confederacy",
      "Infrastructure Failure",
      "Lhanayi"
    ],
    "4": [
      "United Lhanayi Defence Force",
      "Independent",
      "Dictatorship",
      "Boom",
      "Lhanayi"
    ],
    "5": [
      "United Wang Dana Free",
      "Federation",
      "Democracy",
      "None",
      "Nugua"
    ],
    "6": [
      "United Nugua Freedom Party",
      "Independent",
      "Dictatorship",
      "None",
      "Nugua"
    ],
    "7": [
      "Nugua Revered Cult",
      "Independent",
      "Theocracy",
      "None",
      "Nugua"
    ],
    "8": [
      "Order of Nugua",
      "Independent",
      "Dictatorship",
      "None",
      "Nugua"
    ]

#### Saving in Json

In [49]:
save_json('unique_factions.json', unique_factions_data)

#### Loading Json

In [50]:
with open('unique_factions.json') as f:
    parsed = json.load(f)
    data = json.dumps(parsed, indent=2)
    print(data)

{
  "Amitae / 2": {
    "1": [
      "The Ken'Tarii Mandate",
      "Independent",
      "Theocracy",
      "Expansion",
      "Lhanayi, Nugua"
    ],
    "2": [
      "Lhanayi General PLC",
      "Independent",
      "Corporate",
      "None",
      "Lhanayi"
    ],
    "3": [
      "Lhanayi Confederacy",
      "Independent",
      "Confederacy",
      "Infrastructure Failure",
      "Lhanayi"
    ],
    "4": [
      "United Lhanayi Defence Force",
      "Independent",
      "Dictatorship",
      "Boom",
      "Lhanayi"
    ],
    "5": [
      "United Wang Dana Free",
      "Federation",
      "Democracy",
      "None",
      "Nugua"
    ],
    "6": [
      "United Nugua Freedom Party",
      "Independent",
      "Dictatorship",
      "None",
      "Nugua"
    ],
    "7": [
      "Nugua Revered Cult",
      "Independent",
      "Theocracy",
      "None",
      "Nugua"
    ],
    "8": [
      "Order of Nugua",
      "Independent",
      "Dictatorship",
      "None",
      "Nugua"
    ]

#### DataFrame Analysis

In [51]:
df_parsed = pd.DataFrame.from_dict(parsed, orient='index')
df_parsed

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
Amitae / 2,"[The Ken'Tarii Mandate, Independent, Theocracy...","[Lhanayi General PLC, Independent, Corporate, ...","[Lhanayi Confederacy, Independent, Confederacy...","[United Lhanayi Defence Force, Independent, Di...","[United Wang Dana Free, Federation, Democracy,...","[United Nugua Freedom Party, Independent, Dict...","[Nugua Revered Cult, Independent, Theocracy, N...","[Order of Nugua, Independent, Dictatorship, No...","[Nugua Crimson Dynamic Limited, Independent, C...","[Nugua Jet Advanced Partners, Independent, Cor...",,,,
Atun / 2,"[Brazilian League of Pilots, Independent, Conf...","[League of Cava Law Party, Independent, Dictat...","[Cava PLC, Independent, Corporate, None, Cava]","[Movement for Cava Unionists, Independent, Com...","[Beatis Collective of Marlinists, Independent,...","[Liberty Party of Na Chac Og, Independent, Dic...","[Na Chac Og Incorporated, Independent, Corpora...","[Na Chac Og Freedom Party, Independent, Dictat...","[People's Na Chac Og Democrats, Independent, D...",,,,,
Awngtei / 2,"[Gluskabiku Systems, Federation, Corporate, No...","[Hajangerni Blue Vision Limited, Federation, C...","[Leschanatya Future, Federation, Democracy, No...","[Labour of Mehua, Federation, Democracy, War, ...","[Natural Gluskabiku Focus, Independent, Dictat...","[Democrats of Gluskabiku, Independent, Democra...","[Interplanetary Explorations, Independent, Dem...","[Canonn, Independent, Cooperative, None, Jetes]","[Jetes Unionists, Independent, Communism, None...","[Jetes Commodities, Independent, Corporate, No...","[Jetes Netcoms Co, Independent, Corporate, Non...","[Party of Jetes, Independent, Dictatorship, No...",,
BD+37 2416 / 2,"[Sol's Salutis Aeternum, Alliance, Cooperative...","[Sirius Corporation, Independent, Corporate, N...","[Di Jian Alliance Mandate, Alliance, Patronage...","[Di Jian Crimson Life Corp., Independent, Corp...","[Di Jian Dynamic Systems, Independent, Corpora...","[United Di Jian Progressive Party, Alliance, D...","[New Di Jian Constitution Party, Independent, ...","[Perez Ring Brewery, Alliance, Corporate, None...","[LTT 13904 Jet Universal Int, Alliance, Corpor...","[LTT 13904 Independents, Independent, Democrac...","[LTT 13904 Limited, Alliance, Corporate, None,...","[Allied LTT 13904 Constitution Party, Independ...",,
Bagalis / 2,"[The Misfits of the Galaxy, Independent, Democ...","[MCC 858 Organisation, Federation, Corporate, ...","[Labour of Juan, Federation, Democracy, None, ...",[Applied Starport Safety Technologies Corporat...,"[Juan Industries, Independent, Corporate, None...","[Conservatives of Juan, Independent, Dictators...","[Order of Juan, Independent, Dictatorship, Non...","[Autocracy of LTT 16764, Independent, Dictator...","[United Ross 310 Progressive Party, Federation...","[Bumbur Purple Central Limited, Federation, Co...","[LTT 16764 Nationalists, Independent, Dictator...","[LTT 16764 Progressive Party, Federation, Demo...","[Silver Vision Organisation, Independent, Corp...","[Purple Netcoms Industries, Independent, Corpo..."
Belach / 2,"[Brazilian Armada X, Independent, Democracy, P...","[Lem Future Tech, Independent, Corporate, War,...","[Galileo Corporation, Independent, Democracy, ...","[Official Hedeinichs Front, Independent, Dicta...","[Hedeinichs Gold Energy & Co, Federation, Corp...","[United Hedeinichs Left Party, Independent, Co...","[DEFENSIVE TRADERS ALLIANCE, Empire, Patronage...","[Movement for Thethys Liberals, Federation, De...","[Defence Force of Thethys, Independent, Dictat...","[Thethys Crimson Transport Ex, Independent, Co...","[Thethys Blue Major & Co, Federation, Corporat...",,,
Biatae / 2,"[The Fringespace Combine, Independent, Coopera...","[Egovi Union, Independent, Communism, None, Eg...","[Egovi Dominion, Independent, Dictatorship, No...","[Defence Party of Egovi, Independent, Dictator...","[Egovi Jet Universal Solutions, Independent, C...","[Arche Corporation, Empire, Dictatorship, None...","[HIP 91837 Empire Party, Empire, Patronage, No...","[HIP 91837 Power PLC, Independent, Corporate, ...","[Labour of HIP 91837, Independent, Democracy, ...",,,,,
Cariangan / 2,"[HR 692 Natural Industry, Independent, Corpora...","[Tetekhe Inc, Federation, Corporate, Expansion...","[United HR 692 Nationalists, Independent, Dict...","[HR 692 Empire League, Empire, Patronage, None...","[The Vengeful, Federation, Democracy, None, HR...","[HR 692 Future, Federation, Democracy, None, H...","[East India Company, Empire, Corporate, Boom, ...","[Imperial Self Defense Force, Empire, Patronag...","[Bureau of Skeller, Independent, Dictatorship,...","[Republic Party of Skeller, Independent, Commu...","[League of Skeller Nationalists, Empire, Dicta...","[Guardians of Tranquillity, Independent, Dicta...","[Skeller Corporation, Federation, Corporate, B...",
Chakaia / 2,"[Union of Non-Aggressive Pilots, Independent, ...","[Jirani Company, Independent, Corporate, None,...","[Natural Oduduni Law Party, Independent, Dicta...","[Allied Jirani League, Independent, Dictatorsh...","[Kaurareg Silver Public Corp, Federation, Corp...","[Jirani Independents, Federation, Democracy, N...","[Global Defense Initiative, Independent, Dicta...","[Manji Jet Transport Holdings, Independent, Co...","[Values Party of Manji, Independent, Democracy...","[Allied Manji Autocracy, Independent, Dictator...","[Natural Manji Movement, Independent, Dictator...",,,
Chimechilo / 2,"[Verne Partners LLP, Empire, Patronage, None, ...","[LHS 1380 Empire Assembly, Empire, Patronage, ...","[LHS 1380 Liberals, Independent, Democracy, No...","[Bureau of LP 941-16 Front, Empire, Dictatorsh...","[LHS 1380 Ltd, Independent, Corporate, None, L...","[LHS 1380 PLC, Independent, Corporate, None, L...","[Eotienses Citizens' Forum, Empire, Patronage,...","[LTT 982 Party, Empire, Dictatorship, None, LT...","[Official LTT 982 Autocracy, Empire, Dictators...","[LTT 982 State Holdings, Independent, Corporat...",,,,


#### Counting unique factions

In [52]:
dfa_parsed = df_parsed.count(axis=1) \
        .sort_values(ascending=False) \
        .reset_index()
dfa_parsed

Unnamed: 0,index,0
0,Yavapai / 2,14
1,Bagalis / 2,14
2,Wolf 186 / 2,14
3,LTT 2042 / 2,14
4,HIP 13841 / 2,13
5,Misir / 2,13
6,Millese / 2,13
7,Findja / 2,13
8,Cariangan / 2,13
9,Skappa / 2,12


#### Filtering: greater than (gt)

In [53]:
dfb_parsed = dfa_parsed[dfa_parsed[0] \
                        .ge(12)] \
                        .copy()

dfl = dfb_parsed.rename(columns={'index': 'Target/Sources', 0: 'Unique Factions'})
dfl

Unnamed: 0,Target/Sources,Unique Factions
0,Yavapai / 2,14
1,Bagalis / 2,14
2,Wolf 186 / 2,14
3,LTT 2042 / 2,14
4,HIP 13841 / 2,13
5,Misir / 2,13
6,Millese / 2,13
7,Findja / 2,13
8,Cariangan / 2,13
9,Skappa / 2,12


#### Getting URL from the filtered systems

In [54]:
dfr = dfd.loc[:, ['Target/Sources', 'URL']] \
        .drop_duplicates(subset=['Target/Sources']) \
        .reset_index(drop=True)
dfr

Unnamed: 0,Target/Sources,URL
0,Yavapai / 2,https://edtools.cc/pve?s=Yavapai&a=rt
1,Wolf 186 / 2,https://edtools.cc/pve?s=Wolf+186&a=rt
2,Weleutaheim / 2,https://edtools.cc/pve?s=Weleutaheim&a=rt
3,Wader / 2,https://edtools.cc/pve?s=Wader&a=rt
4,Ugrasin / 2,https://edtools.cc/pve?s=Ugrasin&a=rt
5,Svantetit / 2,https://edtools.cc/pve?s=Svantetit&a=rt
6,Slatas / 2,https://edtools.cc/pve?s=Slatas&a=rt
7,Skappa / 2,https://edtools.cc/pve?s=Skappa&a=rt
8,San Yax / 2,https://edtools.cc/pve?s=San+Yax&a=rt
9,Sairre / 2,https://edtools.cc/pve?s=Sairre&a=rt


In [55]:
df_final = pd.merge(dfl, dfr, on = "Target/Sources", how = "left")
df_final = df_final.drop_duplicates().reset_index(drop=True)
df_final

Unnamed: 0,Target/Sources,Unique Factions,URL
0,Yavapai / 2,14,https://edtools.cc/pve?s=Yavapai&a=rt
1,Bagalis / 2,14,https://edtools.cc/pve?s=Bagalis&a=rt
2,Wolf 186 / 2,14,https://edtools.cc/pve?s=Wolf+186&a=rt
3,LTT 2042 / 2,14,https://edtools.cc/pve?s=LTT+2042&a=rt
4,HIP 13841 / 2,13,https://edtools.cc/pve?s=HIP+13841&a=rt
5,Misir / 2,13,https://edtools.cc/pve?s=Misir&a=rt
6,Millese / 2,13,https://edtools.cc/pve?s=Millese&a=rt
7,Findja / 2,13,https://edtools.cc/pve?s=Findja&a=rt
8,Cariangan / 2,13,https://edtools.cc/pve?s=Cariangan&a=rt
9,Skappa / 2,12,https://edtools.cc/pve?s=Skappa&a=rt


#### Saving final data with candidates

In [56]:
df_final.to_excel('candidates_excel.xlsx')
df_final.to_csv('candidates_csv.csv', sep=';')

#### Checking faction state with inara

In [57]:
df_final = pd.read_csv('candidates_csv.csv', sep=';', index_col=0)
df_final

Unnamed: 0,Target/Sources,Unique Factions,URL
0,Yavapai / 2,14,https://edtools.cc/pve?s=Yavapai&a=rt
1,Bagalis / 2,14,https://edtools.cc/pve?s=Bagalis&a=rt
2,Wolf 186 / 2,14,https://edtools.cc/pve?s=Wolf+186&a=rt
3,LTT 2042 / 2,14,https://edtools.cc/pve?s=LTT+2042&a=rt
4,HIP 13841 / 2,13,https://edtools.cc/pve?s=HIP+13841&a=rt
5,Misir / 2,13,https://edtools.cc/pve?s=Misir&a=rt
6,Millese / 2,13,https://edtools.cc/pve?s=Millese&a=rt
7,Findja / 2,13,https://edtools.cc/pve?s=Findja&a=rt
8,Cariangan / 2,13,https://edtools.cc/pve?s=Cariangan&a=rt
9,Skappa / 2,12,https://edtools.cc/pve?s=Skappa&a=rt


In [58]:
df_final['INARA'] = None
df_final['Factions Updated Time'] = None

In [59]:
def check_faction_state(state):
    bag_of_words = ['war', 'election', 'civil war']
    if any(word in state.lower() for word in bag_of_words):
        return True
        

count = 0
for df_index, system_data in enumerate(df_final['Target/Sources']):
    count += 1
    systems = set()
    for key, value in parsed[system_data].items():
        systems.add(value[-1].split(',')[0].strip())
        
    system_index = 0
    fac_list = []
    for system in systems:
        system_index += 1
        param = dict()
        param['search'] = system
        url = 'https://inara.cz/starsystem'
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36 Edg/103.0.1264.71",
        }
        
        # Request
        r = requests.get(url, params=param, headers=headers)

        # Soup
        soup = bs(r.content, 'lxml')
        
        # Checking factions updated time
        fac_updated = soup.find(string='Factions updated')
        
        if fac_updated:
            fac_updated = fac_updated.next_element.string
            
            if system_index % 2 == 0:
                fac_list.append(system + ': ' + fac_updated)
                df_final.loc[df_index, 'Factions Updated Time'] = ' | '.join(fac_list)
                fac_list.clear()
                
            elif system_index % 1 == 0:
                fac_list.append(system + ': ' + fac_updated)

        table_data = soup.body.find('table', class_='tablesorter')

        keep_going = True
        for index, tr in enumerate(table_data.find_all('tr')):
            if not keep_going:
                break
            elif index > 0:
                for i, td in enumerate(tr.find_all('td')):
                    if (i == 3 or i == 4) and check_faction_state(td.text.strip()):
                        df_final.loc[df_index, 'INARA'] = 'war, election or civil war detected'
                        keep_going = False
                        break
    
        sleep(r.elapsed.total_seconds())
        
    if count % 10 == 0:
        print(f'{count} iterations.')

10 iterations.


In [60]:
df_final

Unnamed: 0,Target/Sources,Unique Factions,URL,INARA,Factions Updated Time
0,Yavapai / 2,14,https://edtools.cc/pve?s=Yavapai&a=rt,"war, election or civil war detected","BD-00 3426: 04 Oct 2022, 7:05pm | Detta: 05 Oc..."
1,Bagalis / 2,14,https://edtools.cc/pve?s=Bagalis&a=rt,,"Juan: 04 Oct 2022, 1:18am | LTT 16764: 04 Oct ..."
2,Wolf 186 / 2,14,https://edtools.cc/pve?s=Wolf+186&a=rt,"war, election or civil war detected","36 Persei: 04 Oct 2022, 11:07pm | 27 Kappa Per..."
3,LTT 2042 / 2,14,https://edtools.cc/pve?s=LTT+2042&a=rt,"war, election or civil war detected","Sceptrum: 05 Oct 2022, 2:01pm | LTT 2099: 05 O..."
4,HIP 13841 / 2,13,https://edtools.cc/pve?s=HIP+13841&a=rt,"war, election or civil war detected","HIP 12716: 03 Oct 2022, 1:00am | Quechua: 04 O..."
5,Misir / 2,13,https://edtools.cc/pve?s=Misir&a=rt,,"LTT 12058: 05 Oct 2022, 12:50pm | LTT 2974: 05..."
6,Millese / 2,13,https://edtools.cc/pve?s=Millese&a=rt,"war, election or civil war detected","LP 861-12: 05 Oct 2022, 2:08am | Aryak: 05 Oct..."
7,Findja / 2,13,https://edtools.cc/pve?s=Findja&a=rt,"war, election or civil war detected","Phiagre: 04 Oct 2022, 10:21pm | CD-61 6801: 04..."
8,Cariangan / 2,13,https://edtools.cc/pve?s=Cariangan&a=rt,"war, election or civil war detected","HR 692: 05 Oct 2022, 1:59pm | Skeller: 05 Oct ..."
9,Skappa / 2,12,https://edtools.cc/pve?s=Skappa&a=rt,"war, election or civil war detected","LP 349-61: 04 Oct 2022, 8:09pm | LP 350-75: 03..."


In [61]:
df_final.to_excel('candidates_excel.xlsx')
df_final.to_csv('candidates_csv.csv', sep=';')