# Web Scrape

#### Step 1: Import libraries

#### Step 2: Create a list with all urls

#### Step 3: Loop through each link:
* Get html
* Build table
* Concat all tables

#### Step 4: Expand rows with compact values

# Import Libraries

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Create list with all URLs

In [4]:
filterDay = ['yesterday', 'today', 'tomorrow']

filterHour = [str(i) for i in range(0, 24, 6)]

# Departures

In [5]:
df_departures = pd.DataFrame(columns=['Destination City', 'Destination Code', 'Departure', 'Flight', 'Airline', 'Terminal', 'Status', 'Date', 'Reference Day', 'url'])
for day in filterDay:
    for hour in filterHour:
        url = f'https://www.santiago-airport.com/scl-departures?day={day}&tp={hour}'
        request = requests.get(url)
        soup = BeautifulSoup(request.text, 'html.parser')
        reference_day = url.split('=')[1][:-3]
        date = soup.find('option', {'selected': 'selected'}).text.split(' ')[0] if reference_day != 'Tomorrow' else soup.find('option', {'value': '?day=tomorrow'}).text.split(' ')[0]
        rows = []
        for flight in soup.find_all('div', class_='flight-row')[1:]:
            if flight.find('div', {'class', 'adsense'}): # advertisement row
                continue
            destination = flight.find('div', {'class', 'flight-col flight-col__dest-term'}).find('b').text
            destination_code = flight.find('div', {'class', 'flight-col flight-col__dest-term'}).find('span').text
            departure = flight.find('div', {'class', 'flight-col flight-col__hour'}).text.strip()
            flight_number = flight.find('div', {'class', 'flight-col flight-col__flight'}).text.strip().split('\n')
            airline = flight.find('div', {'class', 'flight-col flight-col__airline'}).text.strip().split('\n')
            terminal = flight.find('div', {'class', 'flight-col flight-col__terminal'}).text.strip()
            status = flight.find_all('a')[-1].text
            for i in range(max(len(flight_number), len(airline))):
                rows.append({
                    'Destination City': destination,
                    'Destination Code': destination_code,
                    'Departure': departure,
                    'Flight': flight_number[i] if i < len(flight_number) else '',
                    'Airline': airline[i] if i < len(airline) else '',
                    'Terminal': terminal,
                    'Status': status,
                    'Date': date,
                    'Reference Day': reference_day,
                    'url': url
                })
        df_departures = pd.concat([df_departures, pd.DataFrame(rows)], ignore_index=True)

# Arrivals

In [6]:
df_arrivals = pd.DataFrame(columns=['Origin City', 'Origin Code', 'Arrival', 'Flight', 'Airline', 'Terminal', 'Status', 'Date', 'Reference Day', 'url'])

for day in filterDay:
    for hour in filterHour:
        url = f'https://www.santiago-airport.com/scl-arrivals?day={day}&tp={hour}'
        request = requests.get(url)
        soup = BeautifulSoup(request.text, 'html.parser')
        reference_day = url.split('=')[1][:-3]
        date = soup.find('option', {'selected': 'selected'}).text.split(' ')[0] if reference_day != 'Tomorrow' else soup.find('option', {'value': '?day=tomorrow'}).text.split(' ')[0]
        rows = []
        for flight in soup.find_all('div', class_='flight-row')[1:]:
            if flight.find('div', {'class', 'adsense'}): # advertisement row
                continue
            origin = flight.find('div', {'class', 'flight-col flight-col__dest-term'}).find('b').text
            origin_code = flight.find('div', {'class', 'flight-col flight-col__dest-term'}).find('span').text
            arrival = flight.find('div', {'class', 'flight-col flight-col__hour'}).text.strip()
            flight_number = flight.find('div', {'class', 'flight-col flight-col__flight'}).text.strip().split('\n')
            airline = flight.find('div', {'class', 'flight-col flight-col__airline'}).text.strip().split('\n')
            terminal = flight.find('div', {'class', 'flight-col flight-col__terminal'}).text.strip()
            status = flight.find_all('a')[-1].text
            for i in range(max(len(flight_number), len(airline))):
                rows.append({
                    'Origin City': origin,
                    'Origin Code': origin_code,
                    'Arrival': arrival,
                    'Flight': flight_number[i] if i < len(flight_number) else '',
                    'Airline': airline[i] if i < len(airline) else '',
                    'Terminal': terminal,
                    'Status': status,
                    'Date': date,
                    'Reference Day': reference_day,
                    'url': url
                })
        df_arrivals = pd.concat([df_arrivals, pd.DataFrame(rows)], ignore_index=True)

In [7]:
df_departures['Date_Time'] = pd.to_datetime(df_departures.Date + "-" + df_departures.Departure, format='%Y-%m-%d-%H:%M')
df_arrivals['Date_Time'] = pd.to_datetime(df_arrivals.Date + "-" + df_arrivals.Arrival, format='%Y-%m-%d-%H:%M')

# Add Columns

In [8]:
city_country = {
    'Antofagasta': 'Chile',
    'Lima': 'Peru',
    'Buenos Aires': 'Argentina',
    'Sao Paulo': 'Brazil',
    'Calama': 'Chile',
    'Concepcion': 'Chile',
    'Iquique': 'Chile',
    'Puerto Montt': 'Chile',
    'Bogota': 'Colombia',
    'Temuco': 'Chile',
    'La Serena': 'Chile',
    'Madrid': 'Spain',
    'Copiapo': 'Chile',
    'Miami': 'United States',
    'Punta Arenas': 'Chile',
    'Panama City': 'Panama',
    'Rio De Janeiro': 'Brazil',
    'Arica': 'Chile',
    'Auckland': 'New Zealand',
    'New York': 'United States',
    'Montevideo': 'Uruguay',
    'Mendoza': 'Argentina',
    'Atlanta': 'United States',
    'Valdivia': 'Chile',
    'Osorno': 'Chile',
    'Paris': 'France',
    'Cordoba': 'Argentina',
    'Houston': 'United States',
    'Los Angeles': 'United States',
    'Balmaceda': 'Chile',
    'Porto Alegre': 'Brazil',
    'Castro': 'Chile',
    'London': 'United Kingdom',
    'Asuncion': 'Paraguay',
    'Mexico City': 'Mexico',
    'Guayaquil': 'Ecuador',
    'Curitiba': 'Brazil',
    'Easter Island': 'Chile',
    'Florianopolis': 'Brazil',
    'Sydney': 'Australia',
    'Cali': 'Colombia',
    'San Juan': 'Argentina',
    'Toronto': 'Canada',
    'Santa Cruz': 'Bolivia',
    'Iguassu Falls': 'Brazil',
    'Dallas': 'United States',
    'Quito': 'Ecuador',
    'Trujillo': 'Peru'
}


In [9]:
df_departures['Destination Country'] = df_departures['Destination City'].map(city_country)
df_arrivals['Origin Country'] = df_arrivals['Origin City'].map(city_country)

In [10]:
Airline_Parent_Company = {
    'Copa Airlines': 'Copa Holdings',
    'LATAM Airlines': 'LATAM Airlines Group',
    'Delta Air Lines': 'Delta Air Lines Inc.',
    'Qantas': 'Qantas Airways Limited',
    'Cathay Pacific': 'Swire Pacific Limited',
    'Malaysia Airlines': 'Malaysia Aviation Group',
    'Avianca': 'Avianca Holdings S.A.',
    'Air Canada': 'Air Canada',
    'Atlas Air': 'Atlas Air Worldwide Holdings',
    'Sky Airline': 'Sky Airline S.A.',
    'JetSMART': 'Indigo Partners',
    'British Airways': 'International Airlines Group',
    'LATAM Cargo Chile': 'LATAM Airlines Group',
    'DHL Aero Expreso': 'DHL Aviation (Panama) S.A.',
    'Aerolineas Argentinas': 'Argentine government',
    'Iberia': 'International Airlines Group',
    'KLM Royal Dutch Airlines': 'Air France-KLM',
    'El Al Israel Airlines': 'Kanaf Arkia Holdings Ltd',
    'Qatar Airways': 'Qatar Airways Group',
    'Turkish Airlines': 'Turkish Airlines',
    'Air France': 'Air France-KLM',
    'Aeromexico': 'Delta Air Lines Inc.',
    'ITA Airways': 'Italian government',
    'United Airlines': 'United Airlines Holdings',
    'ANA All Nippon Airways': 'ANA Holdings Inc.',
    'American Airlines': 'American Airlines Group',
    'Air Europa': 'Globalia',
    'Korean Air': 'Hanjin Group',
    'JAL Japan Airlines': 'Japan Airlines Co. Ltd.',
    'LATAM Cargo Brasil': 'LATAM Airlines Group',
    'JetSMART Airlines Peru': 'Indigo Partners',
    'Jetsmart Airlines': 'Indigo Partners',
    'Ethiopian Airlines': 'Ethiopian government',
    'Martinair': 'Air France-KLM'
}


In [11]:
df_departures['Parent Company'] = df_departures['Airline'].map(Airline_Parent_Company)
df_arrivals['Parent Company'] = df_arrivals['Airline'].map(Airline_Parent_Company)

In [12]:
df_departures['Date_Time'] = pd.to_datetime(df_departures.Date + "-" + df_departures.Departure, format='%Y-%m-%d-%H:%M')
df_arrivals['Date_Time'] = pd.to_datetime(df_arrivals.Date + "-" + df_arrivals.Arrival, format='%Y-%m-%d-%H:%M')

In [None]:
# save csv
# df_departures.to_csv('departures.csv', index=False)
# df_arrivals.to_csv('arrivals.csv', index=False)

In [None]:
df_departures.Destination.value_counts().plot(kind='bar', title='Destination Counts', figsize=(15,5), fontsize=12, color='green')

# %%

In [None]:
df_departures['Destination Country'].value_counts().plot(kind='bar', title='Destination Counts', figsize=(15,5), fontsize=12, color='purple')

In [None]:
df_arrivals['Origin Country'].value_counts().plot(kind='bar', title='Origin Counts', figsize=(15,5), fontsize=12, color='purple')

In [None]:
domestic_flights = df_departures['Destination Country'].value_counts()['Chile']
international_flights = df_departures[df_departures['Destination Country'] != 'Chile']


In [None]:
import matplotlib.pyplot as plt
df_departures[df_departures['Destination Country'] != 'Chile']['Airline'].value_counts().plot(kind='bar', title='Airline Counts (Non-Chile)')
plt.show()

# %%

In [None]:
# International (not chile)
airline_leading_name = df_departures[df_departures['Destination Country'] != 'Chile']['Airline'].value_counts().nlargest(1).index[0]
airline_leading_amount = df_departures[df_departures['Destination Country'] != 'Chile']['Airline'].value_counts().nlargest(1).values[0]
airline_leading_percentage = round(airline_leading_amount / df_departures['Airline'].value_counts().sum() * 100)

# Chile
airline_leading_name_chile = df_departures[df_departures['Destination Country'] == 'Chile']['Airline'].value_counts().nlargest(1).index[0]
airline_leading_amount_chile = df_departures[df_departures['Destination Country'] == 'Chile']['Airline'].value_counts().nlargest(1).values[0]
airline_leading_percentage_chile = round(airline_leading_amount_chile / df_departures['Airline'].value_counts().sum() * 100)

In [None]:
round(airline_leading_amount / df_departures['Airline'].value_counts().sum() * 100)

In [None]:
airline_leading_percentage
airline_leading_amount

In [None]:
# top 5 destinations

df_departures.Destination.value_counts().plot(kind='bar', title='Destinations', figsize=(10,5), fontsize=12, rot=0, color='green', alpha=0.8)

In [None]:
df_departures.Destination.value_counts().nlargest(4).index.tolist()

In [None]:
import matplotlib.pyplot as plt

# get the value counts for the destinations
value_counts = df_departures.Destination.value_counts()

# create a mask for the top 5 destinations
mask = value_counts.rank(ascending=False, method='first') <= 5

# plot the bar chart
plt.figure(figsize=(10,5))
plt.bar(x=value_counts.index, height=value_counts.values, color=['yellow' if m else 'green' for m in mask], alpha=0.8)
plt.xticks(rotation=0, fontsize=12)
plt.title('Destinations', fontsize=14)
plt.show()


In [None]:
df_departures[['Flight', 'Departure', 'Reference Day']]

In [None]:
df_departures[[
    'Destination', 'Destination Code', 'Departure', 'Flight', 'Airline',
    'Terminal', 'Status', 'Date', 'Reference Day',
    'Destination Country', 'Departure_Time']
]

In [None]:
df_arrivals.describe()

In [None]:
df_departures['Departure_Time'] = pd.to_datetime(df_departures['Departure'], format='%H:%M')

In [None]:
# group flights by departure time and count the number of flights
df_grouped = df_departures.groupby('Departure_Time').size().reset_index(name='Count')

# create the line plot using matplotlib
plt.plot(df_grouped['Departure_Time'], df_grouped['Count'], color='blue', linewidth=2)

# set the x and y axis labels
plt.xlabel('Departure Time')
plt.ylabel('Number of Flights')

# show the plot
plt.show()

In [None]:
import matplotlib.pyplot as plt

# create a dictionary to map the reference day to a color
color_map = {'yesterday': 'blue', 'today': 'green', 'tomorrow': 'red'}

# create a scatter plot of departure time vs. flight count, with different colors depending on the day
fig, ax = plt.subplots()
for day, group in df_departures.groupby('Reference Day'):
    ax.scatter(group['Departure'], group.index, label=day, color=color_map[day], alpha=0.5)

# set the x-axis label and tick labels
ax.set_xlabel('Departure Time')
ax.set_xticklabels(df['Departure'].unique(), rotation=90)

# set the y-axis label and tick labels
ax.set_ylabel('Flight Count')
ax.set_yticks(df.index)
ax.set_yticklabels(df['Flight'])

# add a legend
ax.legend()

# show the plot
plt.show()


In [None]:
df_departures[df_departures.Status == 'Landed - Delayed [+]'][['Airline','Status']]

In [None]:
import pandas as pd
from scipy.stats import chi2_contingency

# Create a contingency table of the frequencies
contingency_table = pd.crosstab(df_departures['Airline'], df_departures['Status'])

# Run the chi-squared test of independence
chi2_statistic, p_value, dof, expected_freq = chi2_contingency(contingency_table)

# Print the results
print('Chi-squared statistic: {:.2f}'.format(chi2_statistic))
print('Degrees of freedom: {}'.format(dof))
print('P-value: {:.4f}'.format(p_value))


In [None]:
import numpy as np

# Calculate Cramer's V coefficient
def cramers_v(confusion_matrix):
    chi2 = chi2_statistic
    n = confusion_matrix.sum().sum()
    phi2 = chi2/n
    r, k = confusion_matrix.shape
    phi2corr = max(0, phi2 - ((k-1)*(r-1))/(n-1))
    rcorr = r - ((r-1)**2)/(n-1)
    kcorr = k - ((k-1)**2)/(n-1)
    return np.sqrt(phi2corr / min((kcorr-1), (rcorr-1)))

cramers_v_coef = cramers_v(contingency_table.values)

print('Cramer\'s V coefficient: {:.2f}'.format(cramers_v_coef))


In [None]:
df_departures.Destination.value_counts()#[df_departures.Destination == 'Panama City']
df_departures.Destination[df_departures.Destination == 'Rio De Janeiro']

In [None]:
df_departures[(df_departures['Destination'].isin(['Sao Paulo', 'Panama City']))]

In [None]:
df_departures

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression

# Create a scatter plot of the data
sns.scatterplot(data=df_departures, x='Destination Country', y='Departure_Time')

# Fit a linear regression model to the data
model = LinearRegression()
X = df_departures[['Destination Country']]
y = df_departures['Departure_Time']
model.fit(X, y)

# Get the coefficients and intercept of the linear regression line
slope = model.coef_[0]
intercept = model.intercept_

# Plot the linear regression line
x_values = df_departures['Destination Country'].unique()
y_values = intercept + slope * x_values
plt.plot(x_values, y_values, color='red')

# Add labels and title to the plot
plt.xlabel('Destination Country')
plt.ylabel('Departure Time')
plt.title('Linear Regression of Destination vs. Departure Time')

# Show the plot
plt.show()


In [None]:
df_departures.describe()

## Descriptive statistics

In [None]:
airlines_amount =  df_departures.Airline.nunique()

df_departures.Airline.nunique()

In [None]:
600 / 36

In [None]:
round(6 / airlines_amount * 100)

In [None]:
top_6_airlines_list = df_departures.Airline.value_counts().head(6).index.tolist()
top_6_airlines_flight_amount = df_departures[df_departures['Airline'].isin(top_6_airlines_list)].Flight.nunique()
top_6_airlines_flight_amount

In [None]:
sum_flight_amount_top6_airlines = 

In [None]:
top_6_airlines_amount = 6
top_6_airlines_list = df_departures.Airline.value_counts().index[:top_6_airlines_amount].tolist()
sum_flight_amount_top6_airlines = df_departures[df_departures['Airline'].isin(top_6_airlines_list)].Flight.nunique()
sum_flight_amount_top6_airlines

In [None]:
df_departures.Airline.value_counts().index[:top_airlines_amount].tolist()

In [None]:
df_departures.groupby('Airline')['Flight'].count().sum()

In [None]:
df_departures.groupby('Airline')['Flight'].count().nlargest(2).sum()

In [None]:
412 / 774 * 100

In [None]:

# Create a sample dataframe
data = {'Airlines': ['Delta', 'American Airlines', 'United Airlines', 'Delta', 'Southwest', 'American Airlines', 'Delta'],
        'Flights': ['DL101', 'AA205', 'UA302', 'DL303', 'SW205', 'AA303', 'DL401']}
df = pd.DataFrame(data)

# Get the top 2 most frequent airlines
top_airlines = df['Airlines'].value_counts().nlargest(2).index.tolist()

# Filter the dataframe for only the top 2 airlines
df_top_airlines = df[df['Airlines'].isin(top_airlines)]

# Create a bar chart using Altair
chart = alt.Chart(df_top_airlines).mark_bar().encode(
    y=alt.Y('Airlines:N', sort='-x'),
    x=alt.X('count(Flights):Q', title='Number of Flights')
)

# Set the chart options
chart = chart.properties(
    width=700,
    height=400
)

# Render the chart using Streamlit
# st.altair_chart(chart, use_container_width=True)

chart.show()


# DATA DISCOVERING INSIGHTS

In [None]:
df_departures

# Simple Plot 1: FLIGHTS PER HOUR

x-axis: only the hours
y-axis: number of flughts

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# create a new column with the hour of the date/time
df_departures['Hour'] = df_departures['Date_Time'].dt.hour

# group by hour and count the number of flights
hourly_flights = df_departures.groupby('Hour').count()['Flight']

# create the bar chart
plt.bar(hourly_flights.index, hourly_flights.values)

# set the x-axis ticks to show only the hour
plt.xticks(hourly_flights.index)

# set the x and y axis labels
plt.xlabel('Hour of the Day')
plt.ylabel('Number of flights')

# show the plot
plt.show()


In [None]:
df_departures['Hour'] = df_departures['Date_Time'].dt.hour

df_departures['Hour'].describe()

In [None]:
df_departures['Hour'].mode()[0]

In [None]:
df_departures['Hour'].median()

In [None]:
df_departures['Hour'].mean()

In [None]:
df_departures['Hour'].skew()

In [None]:
df_departures['Hour'].kurtosis()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# create a new column with the hour of the date/time
df_departures['Hour'] = df_departures['Date_Time'].dt.hour

# create a new column with the 6-hour interval
df_departures['Interval'] = pd.cut(df_departures['Hour'], bins=range(0, 25, 6), right=False, labels=['0-6', '6-12', '12-18', '18-24'])

# group by 6-hour interval and count the number of flights
interval_flights = df_departures.groupby('Interval').count()['Flight']

# create the bar chart
plt.bar(interval_flights.index, interval_flights.values)

# set the x-axis ticks to show the intervals
plt.xticks(interval_flights.index)

# set the x and y axis labels
plt.xlabel('Time Interval (hours)')
plt.ylabel('Number of flights')

# show the plot
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# create a new column with the hour of the date/time
df_departures['Hour'] = df_departures['Date_Time'].dt.hour

# create a new column with the 12-hour interval
df_departures['Interval'] = pd.cut(df_departures['Hour'], bins=range(0, 25, 12), right=False, labels=['0-12', '12-24'])

# group by 12-hour interval and count the number of flights
interval_flights = df_departures.groupby('Interval').count()['Flight']

# create the bar chart
plt.bar(interval_flights.index, interval_flights.values)

# set the x-axis ticks to show the intervals
plt.xticks(interval_flights.index)

# set the x and y axis labels
plt.xlabel('Time Interval (hours)')
plt.ylabel('Number of flights')

# show the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# create a new column with the hour of the date/time
df_arrivals['Hour'] = df_arrivals['Date_Time'].dt.hour

# create a new column with the 12-hour interval
df_arrivals['Interval'] = pd.cut(df_arrivals['Hour'], bins=range(0, 25, 12), right=False, labels=['0-12', '12-24'])

# group by 12-hour interval and count the number of flights
interval_flights = df_arrivals.groupby('Interval').count()['Flight']

# create the bar chart
plt.bar(interval_flights.index, interval_flights.values)

# set the x-axis ticks to show the intervals
plt.xticks(interval_flights.index)

# set the x and y axis labels
plt.xlabel('Time Interval (hours)')
plt.ylabel('Number of flights')

# show the plot
plt.show()


# SIMPLE PLOT: FLIGHTS PER AIRLINES

In [None]:
df_departures[['Flight','Destination Country','Destination']]


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Count the number of flights per destination
dest_counts = df_departures['Destination Country'].value_counts()

# Create a bar plot using Seaborn
sns.barplot(x=dest_counts.values, y=dest_counts.index)

# Set the plot title and axis labels
plt.title('Number of Flights per Destination')
plt.xlabel('Number of Flights')
plt.ylabel('Destination')

# Show the plot
plt.show()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Count the number of flights per destination
dest_counts = df_departures['Destination'].value_counts()

# Create a bar plot using Seaborn
sns.barplot(x=dest_counts.values, y=dest_counts.index)

# Set the plot title and axis labels
plt.title('Number of Flights per Destination')
plt.xlabel('Number of Flights')
plt.ylabel('Destination')

# Show the plot
plt.show()


In [None]:
[f"{hour % 12 or 12}:00 {'am' if hour < 12 else 'pm'}" for hour in range(24)]


In [None]:
df_departures.describe()

In [None]:
df_departures[['Destination City','Date_Time']]

In [None]:
df_departures.groupby('Hour').agg({'Destination Country': 'count'}).reset_index()

In [None]:
df_ = df_departures.groupby('Hour').agg({'Destination Country': 'count'}).reset_index()
sns.lmplot(x='Hour', y='Destination Country', data=df_, height=7)

In [None]:
df_ = df_departures.groupby('Hour').agg({'Destination City': 'count'}).reset_index()
sns.lmplot(x='Hour', y='Destination City', data=df_, height=7)

In [None]:
chart = alt.Chart(hourly_flights).mark_bar().encode(
    x=alt.X('Hour:N'),
    y=alt.Y('Count:Q')
st.altair_chart(chart, use_container_width=True)

In [13]:
df_arrivals

Unnamed: 0,Origin City,Origin Code,Arrival,Flight,Airline,Terminal,Status,Date,Reference Day,url,Date_Time,Origin Country,Parent Company
0,Antofagasta,(ANF),00:01,LA343,LATAM Airlines,1,En Route [+],2023-05-02,yesterday,https://www.santiago-airport.com/scl-arrivals?...,2023-05-02 00:01:00,Chile,LATAM Airlines Group
1,Calama,(CJC),00:04,H2245,Sky Airline,1,Landed - On-time [+],2023-05-02,yesterday,https://www.santiago-airport.com/scl-arrivals?...,2023-05-02 00:04:00,Chile,Sky Airline S.A.
2,Puerto Montt,(PMC),00:13,JA156,JetSMART,1,Landed - Delayed [+],2023-05-02,yesterday,https://www.santiago-airport.com/scl-arrivals?...,2023-05-02 00:13:00,Chile,Indigo Partners
3,Sao Paulo,(GRU),00:25,LA751,LATAM Airlines,2,Landed - On-time [+],2023-05-02,yesterday,https://www.santiago-airport.com/scl-arrivals?...,2023-05-02 00:25:00,Brazil,LATAM Airlines Group
4,Sao Paulo,(GRU),00:25,QF3896,Qantas,2,Landed - On-time [+],2023-05-02,yesterday,https://www.santiago-airport.com/scl-arrivals?...,2023-05-02 00:25:00,Brazil,Qantas Airways Limited
...,...,...,...,...,...,...,...,...,...,...,...,...,...
786,Iquique,(IQQ),23:40,LA901,LATAM Airlines,1,Scheduled [+],2023-05-04,tomorrow,https://www.santiago-airport.com/scl-arrivals?...,2023-05-04 23:40:00,Chile,LATAM Airlines Group
787,Iquique,(IQQ),23:40,IB7252,Iberia,1,Scheduled [+],2023-05-04,tomorrow,https://www.santiago-airport.com/scl-arrivals?...,2023-05-04 23:40:00,Chile,International Airlines Group
788,Arica,(ARI),23:50,H2305,Sky Airline,1,Scheduled [+],2023-05-04,tomorrow,https://www.santiago-airport.com/scl-arrivals?...,2023-05-04 23:50:00,Chile,Sky Airline S.A.
789,Calama,(CJC),23:50,LA153,LATAM Airlines,1,Scheduled [+],2023-05-04,tomorrow,https://www.santiago-airport.com/scl-arrivals?...,2023-05-04 23:50:00,Chile,LATAM Airlines Group


In [15]:
df_arrivals.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 791 entries, 0 to 790
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Origin City     791 non-null    object        
 1   Origin Code     791 non-null    object        
 2   Arrival         791 non-null    object        
 3   Flight          791 non-null    object        
 4   Airline         791 non-null    object        
 5   Terminal        791 non-null    object        
 6   Status          791 non-null    object        
 7   Date            791 non-null    object        
 8   Reference Day   791 non-null    object        
 9   url             791 non-null    object        
 10  Date_Time       791 non-null    datetime64[ns]
 11  Origin Country  787 non-null    object        
 12  Parent Company  788 non-null    object        
dtypes: datetime64[ns](1), object(12)
memory usage: 80.5+ KB
