# SETUP

In [43]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from unicodedata import normalize
import re
import pycountry
from countryinfo import CountryInfo
from currencies import Currency
import requests
import json
from bs4 import BeautifulSoup

# Airline and country

In [44]:
url = "https://www.pilotjobsnetwork.com/"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

country_airlines = []

elements = soup.find_all("a", href=lambda href: href and "jobs" in href)
for element in elements :
    text = element.text.strip()
    country_airlines.append(text)

# country_airlines

In [45]:
airlines_by_country = []
for text in country_airlines:
    # Regex pattern to match the country and airline
    apattern = r'^[^-]*-[^-]*-\s*([^(]+)'
    cpattern = r'(\w+)\s\((\w{2})\)'
    

    country_match = re.search(cpattern, text)
    country = country_match.group(1)
    airline_match = re.search(apattern, text)
    airline = airline_match.group(1)

    # Append the airline and country to the list
    airlines_by_country.append((airline, country))

# Create the table
df = pd.DataFrame(airlines_by_country, columns=['Airline', 'Country'])
df['Country'] = df['Country'].replace('UK', 'United Kingdom')
# # Print the table
# df.head(10)

In [46]:
# Define countries standard name (ISO)
# Define a function to map country names to their ISO codes
def map_country_name(name):
    try:
        return pycountry.countries.search_fuzzy(name)[0].name
    except LookupError:
        return None
# Add a column to the dataframe with the ISO code of the country
df['ISO'] = df['Country'].apply(map_country_name)

In [47]:
df_airline = df

In [48]:
df.to_csv("metadata_ctry_airlines.csv")

# Average gross income by country and Currency code (frozen)

In [49]:
url_3 = "https://www.worlddata.info/average-income.php"
df_3 = pd.read_html(url_3, attrs={'class': 'std100 hover', 'id': 'tabsort'})
df_3 = df_3[0]
df_3['Country'] = df_3['Country'].str.replace(' \*', '', regex=True)

In [50]:
# Define countries standard name (ISO)
# Define a function to map country names to their ISO codes
def map_country_name(name):
    try:
        return pycountry.countries.search_fuzzy(name)[0].name
    except LookupError:
        return None
# Add a column to the dataframe with the ISO code of the country
df_3['ISO'] = df_3['Country'].apply(map_country_name)

In [51]:
# Define a function to get the ISO code for each country
def get_iso_code(country_name):
    try:
        country = pycountry.countries.search_fuzzy(country_name)[0]
        return country.alpha_3
    except LookupError:
        return None
df_3['ISO_code'] = df_3['ISO'].apply(get_iso_code)

In [52]:
def get_currency(country):
    try:
        currency = CountryInfo(country).currencies()
        return currency
    except LookupError:
        return None
df_3['Currency'] = df_3['ISO'].apply(get_currency)

def first_item_to_string(arr):
    return str(arr[0])
# df_3['Currency'] = df_3['Currency'].apply(first_item_to_string)

In [53]:
df_3.columns = ['Rank', 'Country', 'yearly_income', 'monthly_income', 'ISO', 'ISO_code', 'Currency']

In [54]:
df_3["yearly_income"] = df_3["yearly_income"].str.replace(",", "").str.replace("$", "").astype(float)
df_3["monthly_income"] = df_3["monthly_income"].str.replace(",", "").str.replace("$", "").astype(float)
df_3["Currency"] = df_3["Currency"].str.replace("[',\[\]]", "")

  df_3["yearly_income"] = df_3["yearly_income"].str.replace(",", "").str.replace("$", "").astype(float)
  df_3["monthly_income"] = df_3["monthly_income"].str.replace(",", "").str.replace("$", "").astype(float)
  df_3["Currency"] = df_3["Currency"].str.replace("[',\[\]]", "")


In [55]:
df_income_code_cur = df_3

In [56]:
df_3.to_csv("metadata_ctry_salary_2.csv")

# Currency Exchange rates

In [57]:
df_V1 = pd.read_csv("METADATA_V1.csv")
df_V1

Unnamed: 0,Country,year_income_gross,month_income_gross,ISO,Currency
0,Monaco,186080,15507,Monaco,EUR
1,Bermuda,122470,10206,Bermuda,BMD
2,Switzerland,90600,7550,Switzerland,CHF
3,Luxembourg,88190,7349,Luxembourg,EUR
4,Norway,83880,6990,Norway,NOK
...,...,...,...,...,...
64,Pakistan,1470,123,Pakistan,PKR
65,Nepal,1220,102,Nepal,NPR
66,Timor-Leste,1140,95,Timor-Leste,LCU
67,Ethiopia,940,78,Ethiopia,ETB


In [58]:
df_V1.to_json("ctry_salry.json", orient="records")

In [59]:
payload = {}
headers= {
  "apikey": "pPf6uwtTqHvmPDaWzHFzihcpKRAK4SC5"}
from_usd = "USD"
amount_convert = 1
def convert_usd(code_currency):
    try:
        response = requests.request("GET", f"https://api.apilayer.com/exchangerates_data/convert?to={code_currency}&from={from_usd}&amount={amount_convert}", headers=headers, data = payload)
        return json.loads(response.text)["info"]["rate"]
    except LookupError:
        return None

df_V1['XR'] = df_V1['Currency'].apply(convert_usd)

In [60]:
df_V2 = df_V1[["Currency", "XR"]]
df_V2 = df_V2.drop_duplicates()

In [61]:
df_XR = df_V2

In [62]:
df_V2.to_csv("XR_list.csv")


# Index cost of living (frozen)

In [63]:
url_4 = "https://www.numbeo.com/cost-of-living/rankings_by_country.jsp"
df_4 = pd.read_html(url_4, attrs={'id': 't2'})
df_4 = df_4[0]

In [64]:
# Define a function to map country names to their ISO codes
def map_country_name(name):
    try:
        return pycountry.countries.search_fuzzy(name)[0].name
    except LookupError:
        return name
# Add a column to the dataframe with the ISO code of the country
df_4['ISO'] = df_4['Country'].apply(map_country_name)

df_4= df_4.replace("Hong Kong (China)", "Hong Kong")
df_4= df_4.replace("Macao (China)", "Macao")
df_4= df_4.replace("Taiwan (China)", "Taiwan")
df_4= df_4.replace("Kosovo (Disputed Territory)", "Kosovo")
df_4 = df_4[['ISO','Cost of Living Index']]
df_4 = df_4.dropna(how='any',axis=0) 

In [65]:
df_index = df_4

In [66]:
df_4.to_csv("cost_of_living_index.csv")

# Average salary income tax (frozen)

In [67]:
df_5 = pd.read_csv("salary_tax.csv")
# Define a function to map country names to their ISO codes
def map_country_name(name):
    try:
        return pycountry.countries.search_fuzzy(name)[0].name
    except LookupError:
        return name
# Add a column to the dataframe with the ISO code of the country
df_5['ISO'] = df_5['Country'].apply(map_country_name)
df_5["Last"].astype(float)
df_5.columns = ["Country", "Income_tax", "ISO"]

In [68]:
df_tax = df_5

In [69]:
df_5.to_csv("income_tax_updated.csv")

# Inflation rate (frozen)

In [70]:
#Load inflation xls and add ISO countries
df_V3 = pd.read_csv("SQ_pred_database - 7 - Inflation.csv")
df_V3 

# Define a function to get the ISO name for each country
def get_iso_code(name):
    try:
        return pycountry.countries.search_fuzzy(name)[0].name
    except LookupError:
        return None
df_V3['ISO_name'] = df_V3['Country'].apply(get_iso_code)
df_inflation = df_V3
df_inflation

Unnamed: 0,ISO_code,Country,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,ISO_name
0,ABW,Aruba,208,432,063,-237,042,048,-089,-047,358,426,122,074,604,Aruba
1,AFG,Afghanistan,218,118,644,739,467,-066,438,498,063,23,544,506,,Afghanistan
2,AGO,Angola,1448,1348,1028,878,73,916,3238,2984,1963,1708,2102,2385,2135,Angola
3,ALB,Albania,357,341,203,193,162,191,129,199,203,141,162,204,673,Albania
4,ARE,United Arab Emirates,088,088,066,109,234,407,162,197,306,-193,-208,018,522,United Arab Emirates
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,WSM,Samoa,-02,288,619,-021,-124,192,013,13,42,219,-157,-301,875,Samoa
189,YEM,"Yemen, Rep.",1117,1954,989,1097,81,394,5,304,276,10,2618,457,4385,
190,ZAF,South Africa,406,502,572,578,614,451,659,527,45,413,322,461,7,South Africa
191,ZMB,Zambia,85,644,659,699,782,1011,1786,658,749,98,1573,2202,1099,Zambia


In [71]:
df_inflation.to_csv("inlfation_rate_ISO.csv")

# Category

In [72]:
from tqdm.contrib.concurrent import thread_map
pd.set_option('display.max_colwidth', None)

#GET AIRLINES URLS
url = 'https://www.pilotjobsnetwork.com/'
max_urls = 5

def get_airlines_urls(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    links = soup.find_all('a')
    for link in links:
        href = link.get('href')
        if href and 'jobs/' in href:
            airline_url = url + href
            yield airline_url
##GET INFO FROM PAGES
def get_airline_tables(airline_url):
    try:
        response = requests.get(airline_url)
        # Parse the HTML content of the response using BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')
        # Find all text that contains the phrase "pilot jobs --->"
        matching_text = soup.find_all(text=lambda text: text and "pilot jobs --->" in text)
        # Regular expression to match text between parentheses
        for text in matching_text:
            paren_regex = r'\((.*?)\)'
            categories = []
            for text in matching_text:
                categories += re.findall(paren_regex, text)
            return [airline_url] + categories
    except NameError:
        return airline_url

# MAP IT 
list_category = list(thread_map(get_airline_tables, get_airlines_urls(url)))

#DF AS OUTPUT
df_category = pd.DataFrame([category for category in list_category if type(category) == list])

  matching_text = soup.find_all(text=lambda text: text and "pilot jobs --->" in text)
501it [00:32, 15.37it/s]


In [73]:
# Define the regex pattern
regex_pattern = r'[^/]+(?=/?$)'

# Define a function to apply the regex pattern and replace underscores with spaces
def clean_url(url):
    match = re.search(regex_pattern, url)
    cleaned_url = match.group(0).replace('_', ' ')
    return cleaned_url
df_category[0] = df_category[0].apply(clean_url)
df_category.columns = ["Airline", "Category"]
df_category

Unnamed: 0,Airline,Category
0,City Airlines GmbH,Major/National/Low Cost
1,European Cargo,Cargo
2,BA CityFlyer,Major/National/Low Cost
3,Wizzair (UK Contract),Major/National/Low Cost
4,easyJet (UK Contract),Major/National/Low Cost
...,...,...
495,Air Transat,Charter
496,Ellinair,Regional
497,Titan Airways Ltd,Regional
498,Aeropartner,Fractional/Corporate


In [74]:
df_category.to_csv("category.csv")

# Conso df (to train model)

## Loading PPJN data scraping

In [75]:
df_extract_ppjn = pd.read_csv("PPJN_extract_salary.csv")
df_extract_ppjn.head()

#fill in blank with gross + remove Nan salary + convert salary to numeric
df_extract_ppjn["gross-net"] = df_extract_ppjn["gross-net"].fillna("gross")
df_extract_ppjn = df_extract_ppjn.dropna()
df_extract_ppjn["salary"] = df_extract_ppjn["salary"].str.replace(',', '.')
df_extract_ppjn["salary"] = pd.to_numeric(df_extract_ppjn["salary"])
df_extract_ppjn["Year"] = df_extract_ppjn["Year"].astype(int)

## Mapping with metadata

In [76]:
#Create df with all features
df_conso = df_extract_ppjn[["Airline", "Year", "Top/Base", "salary", "currency", "gross-net"]]

#Add category
cat_map = dict(zip(df_category['Airline'], df_category['Category']))
df_conso["Category"] = df_conso["Airline"].map(cat_map)

#Add country
country_map = dict(zip(df_airline['Airline'], df_airline['ISO']))
df_conso["Country"] = df_conso["Airline"].map(country_map)

#Add income
income_map = dict(zip(df_income_code_cur["ISO"], df_income_code_cur["yearly_income"]))
df_conso["Income"] = df_conso["Country"].map(income_map)

#Add cost of living
index_map = dict(zip(df_index["ISO"], df_index["Cost of Living Index"]))
df_conso["Index"] = df_conso["Country"].map(index_map)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_conso["Category"] = df_conso["Airline"].map(cat_map)


## Standardize salary 

In [77]:
df_inflation = pd.read_csv("inflation_amended.csv")
for x in range(2010, 2023):
    df_inflation[f"{x}"] = df_inflation[f"{x}"].str.replace(',', '.').astype(float)
    df_inflation.fillna(0, inplace=True)
    df_inflation[f"{x}"] = df_inflation[f"{x}"].replace(np.nan,0)
    df_inflation[f"{x}"] = df_inflation[f"{x}"]/100+1
    df_inflation["2023"] = 1
# df_inflation = df_inflation.set_index("ISO_name")

def update_year(year):
    if year < 2010:
        return 2010
    else:
        return year

df_conso['Year'] = df_conso['Year'].apply(update_year)

In [78]:
df_inflation["Cum2010"]= df_inflation["2011"]*df_inflation["2012"]*df_inflation["2013"]*df_inflation["2014"]*df_inflation["2015"]*df_inflation["2016"]*df_inflation["2017"]*df_inflation["2018"]*df_inflation["2019"]*df_inflation["2020"]*df_inflation["2021"]*df_inflation["2022"]*df_inflation["2023"]
df_inflation["Cum2011"]= df_inflation["2012"]*df_inflation["2013"]*df_inflation["2014"]*df_inflation["2015"]*df_inflation["2016"]*df_inflation["2017"]*df_inflation["2018"]*df_inflation["2019"]*df_inflation["2020"]*df_inflation["2021"]*df_inflation["2022"]*df_inflation["2023"]
df_inflation["Cum2012"]= df_inflation["2013"]*df_inflation["2014"]*df_inflation["2015"]*df_inflation["2016"]*df_inflation["2017"]*df_inflation["2018"]*df_inflation["2019"]*df_inflation["2020"]*df_inflation["2021"]*df_inflation["2022"]*df_inflation["2023"]
df_inflation["Cum2013"]= df_inflation["2014"]*df_inflation["2015"]*df_inflation["2016"]*df_inflation["2017"]*df_inflation["2018"]*df_inflation["2019"]*df_inflation["2020"]*df_inflation["2021"]*df_inflation["2022"]*df_inflation["2023"]
df_inflation["Cum2014"]= df_inflation["2015"]*df_inflation["2016"]*df_inflation["2017"]*df_inflation["2018"]*df_inflation["2019"]*df_inflation["2020"]*df_inflation["2021"]*df_inflation["2022"]*df_inflation["2023"]
df_inflation["Cum2015"]= df_inflation["2016"]*df_inflation["2017"]*df_inflation["2018"]*df_inflation["2019"]*df_inflation["2020"]*df_inflation["2021"]*df_inflation["2022"]*df_inflation["2023"]
df_inflation["Cum2016"]= df_inflation["2017"]*df_inflation["2018"]*df_inflation["2019"]*df_inflation["2020"]*df_inflation["2021"]*df_inflation["2022"]*df_inflation["2023"]
df_inflation["Cum2017"]= df_inflation["2018"]*df_inflation["2019"]*df_inflation["2020"]*df_inflation["2021"]*df_inflation["2022"]*df_inflation["2023"]
df_inflation["Cum2018"]= df_inflation["2019"]*df_inflation["2020"]*df_inflation["2021"]*df_inflation["2022"]*df_inflation["2023"]
df_inflation["Cum2019"]= df_inflation["2020"]*df_inflation["2021"]*df_inflation["2022"]*df_inflation["2023"]
df_inflation["Cum2020"]= df_inflation["2021"]*df_inflation["2022"]*df_inflation["2023"]
df_inflation["Cum2021"]= df_inflation["2022"]*df_inflation["2023"]
df_inflation["Cum2022"]= df_inflation["2023"]
df_inflation["Cum2023"]= 1



In [79]:
df_inflation

Unnamed: 0,ISO_code,Country,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,ISO_name,2023,Cum2010,Cum2011,Cum2012,Cum2013,Cum2014,Cum2015,Cum2016,Cum2017,Cum2018,Cum2019,Cum2020,Cum2021,Cum2022,Cum2023
0,ABW,Aruba,1.0208,1.0432,1.0063,0.9763,1.0042,1.0048,0.9911,0.9953,1.0358,1.0426,1.0122,1.0074,1.0604,Aruba,1,1.191191,1.141862,1.134714,1.162259,1.157398,1.151869,1.162213,1.167701,1.127342,1.081280,1.068247,1.0604,1,1
1,AFG,Afghanistan,1.0218,1.1180,1.0644,1.0739,1.0467,0.9934,1.0438,1.0498,1.0063,1.0230,1.0544,1.0506,1.0000,Afghanistan,1,1.660453,1.485200,1.395340,1.299320,1.241349,1.249596,1.197161,1.140370,1.133231,1.107753,1.050600,1.0000,1,1
2,AGO,Angola,1.1448,1.1348,1.1028,1.0878,1.0730,1.0916,1.3238,1.2984,1.1963,1.1708,1.2102,1.2385,1.2135,Angola,1,6.981923,6.152558,5.579033,5.128731,4.779805,4.378715,3.307686,2.547509,2.129490,1.818833,1.502920,1.2135,1,1
3,ALB,Albania,1.0357,1.0341,1.0203,1.0193,1.0162,1.0191,1.0129,1.0199,1.0203,1.0141,1.0162,1.0204,1.0673,Albania,1,1.317521,1.274075,1.248726,1.225081,1.205552,1.182957,1.167891,1.145104,1.122321,1.106716,1.089073,1.0673,1,1
4,ARE,United Arab Emirates,1.0088,1.0088,1.0066,1.0109,1.0234,1.0407,1.0162,1.0197,1.0306,0.9807,0.9792,1.0018,1.0522,United Arab Emirates,1,1.181870,1.171561,1.163879,1.151329,1.125004,1.081007,1.063774,1.043223,1.012248,1.032169,1.054094,1.0522,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,WSM,Samoa,0.9980,1.0288,1.0619,0.9979,0.9876,1.0192,1.0013,1.0130,1.0420,1.0219,0.9843,0.9699,1.0875,Samoa,1,1.230483,1.196037,1.126318,1.128688,1.142860,1.121330,1.119874,1.105503,1.060943,1.038206,1.054766,1.0875,1,1
189,YEM,"Yemen, Rep.",1.1117,1.1954,1.0989,1.1097,1.0810,1.3940,1.0500,1.3040,1.2760,1.1000,1.2618,1.4570,1.4385,Yemen,1,11.164407,9.339474,8.498930,7.658763,7.084888,5.082416,4.840396,3.711960,2.909060,2.644600,2.095894,1.4385,1,1
190,ZAF,South Africa,1.0406,1.0502,1.0572,1.0578,1.0614,1.0451,1.0659,1.0527,1.0450,1.0413,1.0322,1.0461,1.0700,South Africa,1,1.837823,1.749975,1.655292,1.564844,1.474321,1.410698,1.323481,1.257225,1.203086,1.155369,1.119327,1.0700,1,1
191,ZMB,Zambia,1.0850,1.0644,1.0659,1.0699,1.0782,1.1011,1.1786,1.0658,1.0749,1.0980,1.1573,1.2202,1.1099,Zambia,1,3.348606,3.146003,2.951499,2.758668,2.558587,2.323664,1.971546,1.849827,1.720930,1.567331,1.354300,1.1099,1,1


In [80]:
# Standardize the salary in USD/GROSS/YEARLY/EST.2023

#rate
xr_dict = dict(zip(df_XR.Currency, df_XR.XR))
df_conso['new_salary_USD'] = df_conso.apply(lambda x: x['salary'] / xr_dict[x['currency']] if x['currency'] != 'USD' else x['salary'], axis=1)
#gross
tax_dict = dict(zip(df_tax.ISO, df_tax.Income_tax))
df_conso['new_salary_USD_GROSS'] = df_conso.apply(lambda x: x['new_salary_USD'] * (1+tax_dict[x['Country']]/100) if x['gross-net'] == 'net' else x['new_salary_USD'], axis=1)
#inflation
def calculate_new_salary(row):
    country = row['Country']
    year = row['Year']
    inflation_rate = df_inflation.loc[df_inflation["ISO_name"] == country, f"Cum{year}"].values
    if len(inflation_rate) == 0:
        # handle case where inflation rate is not found
        return None
    else:
        return row['new_salary_USD_GROSS'] * inflation_rate[0]

        

df_conso["new_salary_USD_GROSS_2023"] = df_conso.apply(calculate_new_salary, axis=1)

KeyError: nan

In [None]:
pd.set_option('display.max_columns', 100)
df_conso

Unnamed: 0,Airline,Year,Top/Base,salary,currency,gross-net,Category,Country,Income,Index,new_salary_USD,new_salary_USD_GROSS,new_salary_USD_GROSS_2023
0,British Airways Mainline,2023,CaptMax,181477.5,GBP,gross,Legacy,United Kingdom,44480.0,61.5,218679.206632,218679.206632,218679.206632
1,British Airways Mainline,2023,CaptMin,87876.0,GBP,gross,Legacy,United Kingdom,44480.0,61.5,105890.008194,105890.008194,105890.008194
2,Cargo Air,2019,CaptMax,48000.0,EUR,net,Cargo,Bulgaria,,40.5,50652.680899,55717.948989,67155.425174
3,Cargo Air,2019,CaptMin,42000.0,EUR,net,Cargo,Bulgaria,,40.5,44321.095786,48753.205365,58760.997027
4,DHL Air UK,2023,CaptMax,172102.0,GBP,gross,Cargo,United Kingdom,44480.0,61.5,207381.790138,207381.790138,207381.790138
...,...,...,...,...,...,...,...,...,...,...,...,...,...
101,Swiss International Air Lines,2015,CaptMin,107000.0,CHF,gross,Legacy,Switzerland,90600.0,114.2,116040.733551,116040.733551,120776.204133
102,Atlantic Airways,2023,CaptMax,86528.0,EUR,gross,Regional,Iceland,63460.0,83.3,91309.899433,91309.899433,91309.899433
103,Atlantic Airways,2023,CaptMin,54824.0,EUR,gross,Regional,Iceland,63460.0,83.3,57853.803700,57853.803700,57853.803700
104,Exxaero,2020,CaptMax,78000.0,EUR,gross,Fractional/Corporate,Netherlands,55200.0,68.6,82310.606460,82310.606460,82310.606460


In [None]:
df_model = df_conso[["Airline", "Top/Base", "Category", "Country", "Income", "Index", "new_salary_USD_GROSS_2023"]]
df_model

Unnamed: 0,Airline,Top/Base,Category,Country,Income,Index,new_salary_USD_GROSS_2023
0,British Airways Mainline,CaptMax,Legacy,United Kingdom,44480.0,61.5,218679.206632
1,British Airways Mainline,CaptMin,Legacy,United Kingdom,44480.0,61.5,105890.008194
2,Cargo Air,CaptMax,Cargo,Bulgaria,,40.5,67155.425174
3,Cargo Air,CaptMin,Cargo,Bulgaria,,40.5,58760.997027
4,DHL Air UK,CaptMax,Cargo,United Kingdom,44480.0,61.5,207381.790138
...,...,...,...,...,...,...,...
101,Swiss International Air Lines,CaptMin,Legacy,Switzerland,90600.0,114.2,120776.204133
102,Atlantic Airways,CaptMax,Regional,Iceland,63460.0,83.3,91309.899433
103,Atlantic Airways,CaptMin,Regional,Iceland,63460.0,83.3,57853.803700
104,Exxaero,CaptMax,Fractional/Corporate,Netherlands,55200.0,68.6,82310.606460


In [None]:
df_model.to_csv("df_model.csv", index=False)