# EDA exploring for Inquiry and Prices dataset


## Import necassary libraries

In [None]:
# Import necassary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf
from scipy import stats


In [None]:
%matplotlib inline

In [None]:
#Load datasets
df_merged_IP = pd.read_csv('../data/merged_inquiries_and_prices.csv')

In [None]:
df_merged_IP.head()

## Correcting date and time formatting.

In [None]:
df_merged_IP['date_from'] = pd.to_datetime(df_merged_IP['date_from'], errors = 'coerce')
df_merged_IP['date_to'] = pd.to_datetime(df_merged_IP['date_to'], errors = 'coerce')
df_merged_IP['date'] = pd.to_datetime(df_merged_IP['date'], errors = 'coerce')
df_merged_IP['time_x'] = pd.to_datetime(df_merged_IP['time_x'], errors = 'coerce')
df_merged_IP['arrival_date_x'] = pd.to_datetime(df_merged_IP['arrival_date_x'], errors = 'coerce')
df_merged_IP['departure_date_x'] = pd.to_datetime(df_merged_IP['departure_date_x'], errors = 'coerce')

In [None]:
# Set plotting style
sns.set_style('whitegrid')
plt.rcParams['font.size'] = 14
plt.rcParams['figure.figsize'] = (11, 7)

## Deleting redundant column

In [None]:
del df_merged_IP['Unnamed: 0']

## Descriptive statistics

In [None]:
df_merged_IP.describe()

In [None]:
df_merged_IP.info()

## graphing interactions

In [None]:
# Step 0 - Read the dataset, calculate column correlations and make a seaborn heatmap
corr = df_merged_IP.corr()
ax = sns.heatmap(
    corr,
    vmin=-1, vmax=1, center=0,
    cmap=sns.diverging_palette(20, 220, n=200),
    square=True
)
ax.set_xticklabels(
    ax.get_xticklabels(),
    rotation=45,
    horizontalalignment='right')

In [None]:
df_merged_IP.corr()

In [None]:
plt.scatter(df_merged_IP['inquiry_price_x'], df_merged_IP['filled_in_price_per_day'], c=df_merged_IP['filled_in_price_per_day'], cmap='rainbow');

In [None]:
df_merged_IP['price_catagory'].unique()

In [None]:
# Import data from sklearn.datasets
data = df_merged_IP

# Define features and target
X = data.data
y = data.target
target_names = data.target_names

# Convert arrays to dataframe
df_X = pd.DataFrame(X, columns=data.feature_names)
df_y = pd.DataFrame(y, columns=['price_catagory'])
df_y.price_catagory.replace({0: target_names[0], 1: target_names[1], 2: target_names[2]}, inplace=True)
df_merge1 = df_X.join(df_y)
target_names = data.target_names

In [None]:
sns.pairplot(df_merged_IP, hue="price_catagory", height=3);

## Modelling 

In [None]:
smf.ols(formula='inquiry_price_x ~ filled_in_price_per_day + filled_in_price_per_week', data=df_merged_IP).fit().summary()