In [1]:
import pandas as pd
import numpy as np
import requests 
from bs4 import BeautifulSoup


In [2]:
url="https://en.wikipedia.org/wiki/List_of_countries_by_vehicles_per_capita"
#headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"}
response = requests.get(url)
    
wikisoup = BeautifulSoup(response.content, "html.parser")    
tbl=wikisoup.find("table",attrs={"class":"wikitable sortable"})

Country=list()
Motor_vehicles_per_1k=list()
for row in tbl("tr"):
    cells=row("td")
    if (len(cells)<5):
        continue
    Country.append(cells[1].get_text().strip("\xa0"))
    Motor_vehicles_per_1k.append(cells[2].get_text().strip("\n"))

Motor_vehicles_per_1k[5]=789
df=pd.DataFrame({"Country":Country,"Motor_vehicles_per_1k":Motor_vehicles_per_1k})
df

Unnamed: 0,Country,Motor_vehicles_per_1k
0,San Marino,1263
1,Monaco,899
2,New Zealand,837
3,Iceland,824
4,United States,816
...,...,...
186,Lesotho,4
187,Bangladesh,4
188,Somalia,3
189,Solomon Islands,3


In [3]:
url2="https://en.wikipedia.org/wiki/List_of_countries_by_population_(United_Nations)"
response2 = requests.get(url2)
wikisoup2 = BeautifulSoup(response2.content, "html.parser")    

tbl2=wikisoup2.find("table",attrs={"class":"wikitable sortable static-row-numbers mw-datatable"})

df_pop=pd.read_html(str(tbl2))
df_pop=pd.DataFrame(df_pop[0])

df_pop.drop("UN continentalregion[4]",axis='columns', inplace=True)
df_pop.drop("UN statisticalsubregion[4]",axis='columns', inplace=True)
df_pop.drop("Population(1 July 2018)",axis='columns', inplace=True)
df_pop.drop("Change",axis='columns', inplace=True)
df_pop.sort_values("Country/Area",ascending=True, inplace=True)
df_pop=df_pop.rename(columns={'Country/Area':'Country'})
df_pop=df_pop.rename(columns={'Population(1 July 2019)':'Population'})

In [4]:
df_pop.reset_index(drop=True, inplace=True)
df_pop

Unnamed: 0,Country,Population
0,Afghanistan,38041754
1,Albania,2880917
2,Algeria,43053054
3,American Samoa (United States),55312
4,Andorra,77142
...,...,...
229,Western Samoa,197097
230,World,7713468100
231,Yemen,29161922
232,Zambia,17861030


In [5]:
url3="https://www.indexmundi.com/facts/indicators/NV.IND.MANF.CD/rankings"
response3 = requests.get(url3)
manufactury = BeautifulSoup(response3.content, "html.parser")    

tbl3=manufactury.find("table")

df_factory=pd.read_html(str(tbl3))
df_factory=pd.DataFrame(df_factory[0])
df_factory.drop("Rank",axis='columns', inplace=True)
df_factory.drop("Year",axis='columns', inplace=True)
df_factory.sort_values("Country",ascending=True, inplace=True)
df_factory.reset_index(drop=True, inplace=True)
df_factory

Unnamed: 0,Country,Value
0,Afghanistan,2.241796e+09
1,Albania,9.366004e+08
2,Algeria,4.722464e+10
3,Andorra,9.430922e+07
4,Angola,8.036419e+09
...,...,...
184,Venezuela,5.823696e+10
185,Vietnam,3.922565e+10
186,Yemen,3.699414e+09
187,Zambia,2.265478e+09


In [6]:
import sklearn.preprocessing as preprocessing

float_array = df_factory['Value'].values.astype(float).reshape(-1,1)
min_max_scaler = preprocessing.MinMaxScaler()
scaled_array = min_max_scaler.fit_transform(float_array)
df_factory['Value']=scaled_array
df_factory

Unnamed: 0,Country,Value
0,Afghanistan,0.000560
1,Albania,0.000234
2,Algeria,0.011798
3,Andorra,0.000024
4,Angola,0.002008
...,...,...
184,Venezuela,0.014549
185,Vietnam,0.009800
186,Yemen,0.000924
187,Zambia,0.000566


In [7]:
df_count=pd.read_csv (r'resultCountry.csv')
df_count.drop("Unnamed: 0",axis='columns', inplace=True)
df_count.drop("Link_to_Country",axis='columns', inplace=True)
df_count=df_count.merge(df_pop, on='Country', how='left')
df_count=df_count.merge(df, on='Country', how='left')
df_count=df_count.merge(df_factory, on='Country', how='left')
df_count=df_count.rename(columns={'Value':'Manufacturing_value_normalized'})

df_count

Unnamed: 0,Country,Population,Motor_vehicles_per_1k,Manufacturing_value_normalized
0,Afghanistan,38041754.0,47,0.000560
1,Aland Islands,,,
2,Albania,2880917.0,167,0.000234
3,Algeria,43053054.0,153,0.011798
4,Andorra,77142.0,,0.000024
...,...,...,...,...
196,Venezuela,28515829.0,145,0.014549
197,Vietnam,96462106.0,23,0.009800
198,Yemen,29161922.0,37,0.000924
199,Zambia,17861030.0,23,0.000566


In [8]:
df_final=df_count.dropna(thresh=3)
df_final

Unnamed: 0,Country,Population,Motor_vehicles_per_1k,Manufacturing_value_normalized
0,Afghanistan,38041754.0,47,0.000560
2,Albania,2880917.0,167,0.000234
3,Algeria,43053054.0,153,0.011798
4,Andorra,77142.0,,0.000024
5,Angola,31825295.0,32,0.002008
...,...,...,...,...
196,Venezuela,28515829.0,145,0.014549
197,Vietnam,96462106.0,23,0.009800
198,Yemen,29161922.0,37,0.000924
199,Zambia,17861030.0,23,0.000566


In [9]:

df_final["Total_Motor_vehicles"]=None
df_final['Population'] = pd.to_numeric(df_final['Population'], errors='coerce').fillna(1) 
df_final['Motor_vehicles_per_1k'] = pd.to_numeric(df_final['Motor_vehicles_per_1k'], errors='coerce').fillna(1) 
df_final["Total_Motor_vehicles"]= df_final['Population']*df_final['Motor_vehicles_per_1k']*0.001
df_final

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final["Total_Motor_vehicles"]=None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final['Population'] = pd.to_numeric(df_final['Population'], errors='coerce').fillna(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final['Motor_vehicles_per_1k'] = pd.to_numeric(df_final['Motor_vehicles_pe

Unnamed: 0,Country,Population,Motor_vehicles_per_1k,Manufacturing_value_normalized,Total_Motor_vehicles
0,Afghanistan,38041754.0,47.0,0.000560,1787962.438
2,Albania,2880917.0,167.0,0.000234,481113.139
3,Algeria,43053054.0,153.0,0.011798,6587117.262
4,Andorra,77142.0,1.0,0.000024,77.142
5,Angola,31825295.0,32.0,0.002008,1018409.440
...,...,...,...,...,...
196,Venezuela,28515829.0,145.0,0.014549,4134795.205
197,Vietnam,96462106.0,23.0,0.009800,2218628.438
198,Yemen,29161922.0,37.0,0.000924,1078991.114
199,Zambia,17861030.0,23.0,0.000566,410803.690


In [10]:
df_final.to_csv("CountiesByVehicleAndMAnufacturing.csv")

In [11]:
url4="https://en.wikipedia.org/wiki/List_of_countries_by_forest_area_(percentage)"
response4 = requests.get(url4)
    
forest= BeautifulSoup(response4.content, "html.parser")    
tbl4=forest.findAll("table",attrs={"class":"wikitable sortable"})[1]

Country=list()
Forest_percentage=list()
for row in tbl4("tr"):
    cells=row("td")
    if (len(cells)<5):
        continue
    Country.append(cells[1].get_text().strip("\xa0"))
    Forest_percentage.append(cells[4].get_text().strip("%\n"))

Forest_percentage[100]=80.00

In [13]:
df_forest=pd.DataFrame({"Country":Country,"Forest_percentage":Forest_percentage})
df_forest.sort_values("Country",ascending=True, inplace=True)
df_forest.reset_index(drop=True, inplace=True)
df_forest['Forest_percentage'].astype(str).astype(float)

0       0.25
1      26.84
2       1.70
3      34.19
4      47.41
       ...  
185    51.68
186    37.14
187     0.85
188    50.00
189    66.35
Name: Forest_percentage, Length: 190, dtype: float64

In [23]:
url5="https://worldpopulationreview.com/country-rankings/countries-by-density"
response5 = requests.get(url5)
    
density= BeautifulSoup(response5.content, "html.parser")    
tbl5=density.find("table",attrs={"class":"jsx-1487038798 table table-striped tp-table-body"})
Country=list()
Density=list()
for row in tbl4("tr"):
    cells=row("td")
    if (len(cells)<5):
        continue
    Country.append(cells[1].get_text().strip("\xa0"))
    t=cells[2].get_text().strip("%\n")
    t=t.replace(',','')
    Density.append(t)
Density[100]=23117

In [31]:
df_density=pd.DataFrame({"Country":Country,"Population_Density_km2":Density})
df_density.sort_values("Country",ascending=True, inplace=True)
df_density.reset_index(drop=True, inplace=True)
df_density['Population_Density_km2'].astype(str).astype(float)
df_density

Unnamed: 0,Country,Population_Density_km2
0,Afghanistan,1631
1,Albania,7716
2,Algeria,42000
3,Andorra,160
4,Angola,591040
...,...,...
185,Venezuela,471378
186,Vietnam,123000
187,Yemen,4490
188,Zambia,376309


In [28]:
df_density=df_density.merge(df_forest, on='Country', how='left')
df_density

Unnamed: 0,Country,Population_Density_km2,Forest_percentage
0,Afghanistan,1631,00.25
1,Albania,7716,26.84
2,Algeria,42000,01.70
3,Andorra,160,34.19
4,Angola,591040,47.41
...,...,...,...
185,Venezuela,471378,51.68
186,Vietnam,123000,37.14
187,Yemen,4490,00.85
188,Zambia,376309,50.00


In [29]:
df_density.to_csv("Population&Forest_Density.csv")