In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [None]:
olx_table = pd.read_csv('OLX_Offers.csv')

In [None]:
olx_table.columns

In [None]:
olx_table.shape

In [None]:
olx_table.head()

## Table of contents:
1. Cleaning table
 * translate column names to English 

## Data Cleaning:

1.Translate column names to English:

In [None]:
olx_table.rename(columns={'Cena': 'Price', 'Cena za m²': 'PriceSqM', 'Czynsz (dodatkowo)': 'MonthlyRent', 'Finanse': 'Finances', 
'Liczba pokoi': 'Bedrooms', 'Link': 'Link', 'Miejsce': 'Place', 'Oferta od': 'Advert_Owner', 'Powierzchnia': 'LivingAreaSqM',
'Poziom': 'Floor', 'Rodzaj zabudowy': 'BuildingType', 'Rynek': 'Market', 'Umeblowane': 'Furnishings', 'Miasto': 'City'},inplace=True)

In [None]:
olx_table.columns

2.Create a column specifying type of offer: For Sale or For Rent:

In [None]:
olx_table['OfferType'] = np.where(olx_table['MonthlyRent'].isnull(), 'For_Sale', 'For_Rent')

3.Remove unnecessary columns with website link and information about loans 

In [None]:
olx_table.drop(columns={'Finances','Link'}, axis=1, inplace=True)

In [None]:
olx_table.head(2)

4.Is there possibility to negotiate a price?:

In [None]:
olx_table['Negotiating'] = np.where(olx_table.Price.str.contains('Do negocjacji'), 1, 0)

5.Remove 'null' from 2nd column

In [None]:
olx_table['PriceSqM'] = olx_table['PriceSqM'].replace(np.nan, '0  zł/m²')

In [None]:
olx_table['MonthlyRent'] = olx_table['MonthlyRent'].replace(np.nan, '0  zł')

In [None]:
olx_table.head(2)

6.Create a function which remove unnecessary text from chosen column and keep just the first value:

In [None]:
def remove_str(x):
    token1 = x.split(' złDo negocjacji') # Price
    token2 = x.split(' zł') # Price / MonthlyRent
    token3 = x.split(' zł/m²') # PriceSqM
    token4 = x.split(' m²') # LivingAreaSqM
    if len(token1) == 2:
        return (str(token1[0]))
    elif len(token2) == 2:
        return (str(token2[0]))
    elif len(token3) == 2:
        return (str(token3[0]))
    elif len(token4) == 2:
        return (str(token4[0]))
    try:
        return str(x)
    except:
        return False    

In [None]:
olx_table['Price'] = olx_table['Price'].apply(remove_str)

In [None]:
olx_table['PriceSqM'] = olx_table['PriceSqM'].apply(remove_str)

In [None]:
olx_table['LivingAreaSqM'] = olx_table['LivingAreaSqM'].apply(remove_str)

In [None]:
olx_table['MonthlyRent'] = olx_table['MonthlyRent'].apply(remove_str)

In [None]:
olx_table.head(2)

7.Remove empty spaces in the first two columns

In [None]:
olx_table.iloc[0,0]

In [None]:
olx_table['Price'] = olx_table['Price'].str.replace(' ', '')
olx_table['PriceSqM'] = olx_table['PriceSqM'].str.replace(' ', '') 
olx_table.iloc[0,0]

In [None]:
olx_table.head(2)

8.Change values in Bedrooms column

In [None]:
olx_table.Bedrooms.unique()

In [None]:
olx_table.groupby('Bedrooms')['Bedrooms'].count()

In [None]:
def rooms_number(x):
    if x == 'Kawalerka' or x == '1 pokój':
        return 1
    elif x == '2 pokoje':
        return 2
    elif x == '3 pokoje':
        return 3
    else:
        return 4

In [None]:
olx_table['Bedrooms'] = olx_table['Bedrooms'].apply(rooms_number)

In [None]:
olx_table.groupby('Bedrooms')['Bedrooms'].count()

9.Change data types - 'object' to 'float' - for columns with numbers.

In [None]:
olx_table.shape

In [None]:
olx_table.dtypes

In [None]:
def is_float_a(x):
    try:
        return float(x)
    except:
        return False

In [None]:
def is_float_b(x):
    return float(x)

In [None]:
olx_table['Price'] = olx_table['Price'].apply(is_float_a).apply(is_float_b)

In [None]:
olx_table['PriceSqM'] = olx_table['PriceSqM'].apply(is_float_a).apply(is_float_b)

In [None]:
olx_table['MonthlyRent'] = olx_table['MonthlyRent'].apply(is_float_a).apply(is_float_b)

In [None]:
olx_table['LivingAreaSqM'] = olx_table['LivingAreaSqM'].apply(is_float_a).apply(is_float_b)

In [None]:
olx_table.dtypes

In [None]:
olx_table.shape

10.Change values in Floor columns

In [None]:
olx_table.Floor.unique() # we have some object in here

In [None]:
olx_table.groupby('Floor')['Floor'].count()

I am not interested in: Attic (Poddasze), floor > 10. So I removed records with these floors and with null. Ground Floor  (Parter) I renamed to 0 

In [None]:
olx_table.drop(olx_table[(olx_table.Floor=='Poddasze')|(olx_table.Floor=='Powyżej 10')].index, axis=0, inplace=True)

In [None]:
olx_table.dropna(subset=['Floor'],inplace=True)

In [None]:
olx_table.Floor = olx_table.Floor.replace('Parter','0')

In [None]:
olx_table.groupby('Floor')['Floor'].count()

10.Divide the olx table to 2 separate tables:

- For Rent
- For Sale

In [None]:
df_rent = olx_table[olx_table.OfferType=='For_Rent'].copy()
df_sale = olx_table[olx_table.OfferType=='For_Sale'].copy()

"For Sale" Table

In [None]:
df_sale.head()

In [None]:
df_sale.drop(columns={'MonthlyRent','OfferType'}, axis='columns', inplace=True) # we don't need a rental column in this table

In [None]:
df_sale.describe()

In [None]:
len(df_sale)

Removing records without price e.g. exchange a flat

In [None]:
df_sale = df_sale.drop(df_sale[df_sale.Price==0].index)

In [None]:
df_sale.describe()

In [None]:
df_sale.isnull().sum()

In [None]:
df_sale.head()

In [None]:
len(df_sale)

In [None]:
df_sale.groupby('City')['City'].count().sort_values()

"For Rent" Table

In [None]:
df_rent.head()

In [None]:
df_rent.drop(columns={'PriceSqM','Market','OfferType'}, axis='columns', inplace=True)

In [None]:
df_rent.describe()

In [None]:
df_rent.isnull().sum()

In [None]:
df_rent.head()

In [None]:
len(df_rent)

In [None]:
df_rent.groupby('City')['City'].count().sort_values()

# Charts

In [None]:
def plot_bar_x(label1, label2):
    index = np.arange(len(label1))
    plt.bar(index, label2)
    plt.xlabel('', fontsize=10)
    plt.ylabel('Number of offers', fontsize=10)
    plt.xticks(index, label1, fontsize=10, rotation=30)
    plt.title('Amount of offers')
    plt.show()

In [None]:
a = olx_r.groupby('Owner').agg({'Price': 'count'}).reset_index()
a

In [None]:
# Create an empty list 
owners_label =[]
amount_label =[]
  
# Iterate over each row 
for index, rows in a.iterrows(): 
    owners_label.append(rows.Owner)
    amount_label.append(rows.Price)
  
print(owners_label,amount_label) 

In [None]:
plot_bar_x(owners_label, amount_label)

What type of buildings do we have

In [None]:
building_types = olx_r[olx_r.City=='wodzislaw-slaski'].groupby('Buildings_Type')['Price'].count().reset_index()

building_types_label = []
building_types_amount = []

for index, rows in building_types.iterrows():
    building_types_label.append(rows.Buildings_Type)
    building_types_amount.append(rows.Price)
print(building_types_label,building_types_amount)

In [None]:
plot_bar_x(building_types_label, building_types_amount)

# Charts... in progress

In [None]:
wodzislaw = olx_table[olx_table.City=='Wodzislaw-Slaski']

In [None]:
rybnik = olx_table[olx_table.City=='Rybnik']

In [None]:
plt.style.use('fivethirtyeight')

x = len(rybnik[rybnik.Offer_Type=='for_sale'])
y = len(rybnik[rybnik.Offer_Type=='for_rent'])
values_a = [x,y]
labels_a = ['For Sale', 'For Rent']
colors_a = ['#174F5F', '#3CAEA3']
explode_a = [0, 0.05]

i = len(wodzislaw[wodzislaw.Offer_Type=='for_sale'])
j = len(wodzislaw[wodzislaw.Offer_Type=='for_rent'])
values_b = [i,j]
labels_b = ['For Sale', 'For Rent']
colors_b = ['#174F5F', '#3CAEA3']
explode_b = [0, 0.05]

# create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2)

# plot each pie chart in a separate subplot
ax1.pie(values_a, labels=labels_a, colors=colors_a, explode=explode_a, startangle=0, autopct='%1.1f%%', wedgeprops={'edgecolor': 'black'})
ax2.pie(values_b, labels=labels_b, colors=colors_b, explode=explode_b, startangle=0, autopct='%1.1f%%', wedgeprops={'edgecolor': 'black'})

ax1.set_title('Rybnik')
ax2.set_title('Wodzislaw')
fig.suptitle('Offert Type Ratio')

#fig.legend(loc='upper center')
plt.tight_layout()
plt.show()