In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display

In [2]:
def descriptive_stats(df):
    # Show shape of DF
    print('Shape: ', df.shape)
    # Show first and last rows of DF
    display(df.iloc[[0,-1]])
    # Show schema information
    print('Schema:')
    display(df.info())
    # Check if any null values are present
    print('Null Values:') 
    display(df.isnull().sum())
    # Check if any duplicates are present
    print('Duplicate Values:')
    display(df.duplicated().sum())
    # Describe the stats
    display(df.describe())
    
# Commenting out these checks because skew() and outliers() cannot be performed on the values without cleaning them first
    # Check and display skewness for each columns
#     print("Skewness:")
#     for column in df.columns:
#         skewness = df[column].skew()
#         if skewness > 1: print(f'{columns} is positively skewed with skewness: {skewness}')
#         if skewness < -1: print(f'{columns} is negatively skewed with skewness: {skewness}')
            
    # Check and display outliers
    print("Outliers:")
#     q1 = df.quantile(0.25)
#     q3 = df.quantile(0.75)
#     iqr = q3 - q1
#     outliers = (df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))
#     print(outliers)

In [3]:
# Source: https://data.gov.ie/dataset/hsq06-average-price-of-houses/resource/eec93d2b-a0bf-4b59-bbbb-3839c95ac7e1
housing_prices_df = pd.read_csv("https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/HSQ06/CSV/1.0/en")

descriptive_stats(housing_prices_df)

Shape:  (2352, 8)


Unnamed: 0,STATISTIC,Statistic Label,TLIST(Q1),Quarter,C02343V02817,Area,UNIT,VALUE
0,HSQ06C1,New House Prices,19751,1975Q1,-,National,Euro,11887.0
2351,HSQ06C2,Second Hand House Prices,20164,2016Q4,06,Other areas,Euro,207253.0


Schema:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2352 entries, 0 to 2351
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   STATISTIC        2352 non-null   object 
 1   Statistic Label  2352 non-null   object 
 2   TLIST(Q1)        2352 non-null   int64  
 3   Quarter          2352 non-null   object 
 4   C02343V02817     2352 non-null   object 
 5   Area             2352 non-null   object 
 6   UNIT             2352 non-null   object 
 7   VALUE            2268 non-null   float64
dtypes: float64(1), int64(1), object(6)
memory usage: 147.1+ KB


None

Null Values:


STATISTIC           0
Statistic Label     0
TLIST(Q1)           0
Quarter             0
C02343V02817        0
Area                0
UNIT                0
VALUE              84
dtype: int64

Duplicate Values:


0

Unnamed: 0,TLIST(Q1),VALUE
count,2352.0,2268.0
mean,19957.5,137144.674162
std,121.240118,105517.303571
min,19751.0,10667.0
25%,19852.75,46902.0
50%,19957.5,87355.0
75%,20062.25,222014.5
max,20164.0,549330.0


Outliers:


In [4]:
# Source: https://data.gov.ie/dataset/overall-loan-approvals-by-year?package_type=dataset
loan_approvals_df = pd.read_csv("https://opendata.housing.gov.ie/dataset/f8f865c4-008f-4ef5-9902-09ad73d2acb0/resource/23c06cd8-8d58-4b2f-82b3-95f014f35b57/download/overall_loan_approvals_by_year.csv", header=[0,1], skiprows=1, encoding='cp1252')
descriptive_stats(loan_approvals_df)

Shape:  (47, 7)


Unnamed: 0_level_0,Year,New Houses,Unnamed: 2_level_0,Other Houses,Unnamed: 4_level_0,Totals,Unnamed: 6_level_0
Unnamed: 0_level_1,Unnamed: 0_level_1,Number,Value €m,Number,Value €m,Number,Value €m
0,1970.0,7884.0,33.5,4587.0,19.9,12471.0,53.5
46,,,,,,,


Schema:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47 entries, 0 to 46
Data columns (total 7 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   (Year, Unnamed: 0_level_1)       46 non-null     float64
 1   (New Houses, Number)             46 non-null     object 
 2   (Unnamed: 2_level_0, Value  €m)  46 non-null     object 
 3   (Other Houses, Number)           46 non-null     object 
 4   (Unnamed: 4_level_0, Value  €m)  46 non-null     object 
 5   (Totals, Number)                 46 non-null     object 
 6   (Unnamed: 6_level_0, Value  €m)  46 non-null     object 
dtypes: float64(1), object(6)
memory usage: 2.7+ KB


None

Null Values:


Year                Unnamed: 0_level_1    1
New Houses          Number                1
Unnamed: 2_level_0  Value  €m             1
Other Houses        Number                1
Unnamed: 4_level_0  Value  €m             1
Totals              Number                1
Unnamed: 6_level_0  Value  €m             1
dtype: int64

Duplicate Values:


0

Unnamed: 0_level_0,Year
Unnamed: 0_level_1,Unnamed: 0_level_1
count,46.0
mean,1992.5
std,13.422618
min,1970.0
25%,1981.25
50%,1992.5
75%,2003.75
max,2015.0


Outliers:


In [5]:
# Source: https://data.gov.ie/dataset/supply-of-housing-land-2000-to-2012?package_type=dataset
supply_housing_df = pd.read_csv("https://opendata.housing.gov.ie/dataset/d709918c-bd2d-4b39-aeff-0ab3f5f6603e/resource/9166d0e4-51b1-406d-a496-0f9fb8586230/download/supply_of_housing_land_2000_to_2012.csv", header=[0,1], skiprows=1)
descriptive_stats(supply_housing_df)

Shape:  (36, 27)


Unnamed: 0_level_0,Unnamed: 0_level_0,30-Jun-00,Unnamed: 2_level_0,30-Jun-01,Unnamed: 4_level_0,30-Jun-02,Unnamed: 6_level_0,30-Jun-03,Unnamed: 8_level_0,30-Jun-04,...,30-Jun-08,Unnamed: 18_level_0,30-Jun-09,Unnamed: 20_level_0,30-Jun-10,Unnamed: 22_level_0,30-Jun-11,Unnamed: 24_level_0,30-Jun-12,Unnamed: 26_level_0
Unnamed: 0_level_1,Local Authority,Hectares,No. of Housing Units,Hectares,No. of Housing Units,Hectares,No. of Housing Units,Hectares,No. of Housing Units,Hectares,...,Hectares,No. of Housing Units,Hectares,No. of Housing Units,Hectares,No. of Housing Units,Hectares,No. of Housing Units,Hectares,No. of Housing Units
0,Carlow,132,3060,222,4339,201,4007,180,3629,227,...,197,3210,187,3020,189,3024,189,3024,378,9351
35,TOTAL,10775,263346,8816,247290,12177,327784,12819,368705,12540,...,14191,462709,17112,485246,19821,502736,17223,461806,13707,394357


Schema:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 27 columns):
 #   Column                                       Non-Null Count  Dtype 
---  ------                                       --------------  ----- 
 0   (Unnamed: 0_level_0, Local Authority)        36 non-null     object
 1   (30-Jun-00, Hectares)                        34 non-null     object
 2   (Unnamed: 2_level_0, No. of Housing Units)   34 non-null     object
 3   (30-Jun-01, Hectares)                        35 non-null     object
 4   (Unnamed: 4_level_0, No. of Housing Units)   35 non-null     object
 5   (30-Jun-02, Hectares)                        35 non-null     object
 6   (Unnamed: 6_level_0, No. of Housing Units)   35 non-null     object
 7   (30-Jun-03, Hectares)                        35 non-null     object
 8   (Unnamed: 8_level_0, No. of Housing Units)   35 non-null     object
 9   (30-Jun-04, Hectares)                        35 non-null     object
 10  (Unnamed

None

Null Values:


Unnamed: 0_level_0   Local Authority         0
30-Jun-00            Hectares                2
Unnamed: 2_level_0   No. of Housing Units    2
30-Jun-01            Hectares                1
Unnamed: 4_level_0   No. of Housing Units    1
30-Jun-02            Hectares                1
Unnamed: 6_level_0   No. of Housing Units    1
30-Jun-03            Hectares                1
Unnamed: 8_level_0   No. of Housing Units    1
30-Jun-04            Hectares                1
Unnamed: 10_level_0  No. of Housing Units    1
30-Jun-05            Hectares                1
Unnamed: 12_level_0  No. of Housing Units    1
30-Jun-06            Hectares                1
Unnamed: 14_level_0  No. of Housing Units    1
30-Jun-07            Hectares                1
Unnamed: 16_level_0  No. of Housing Units    1
30-Jun-08            Hectares                1
Unnamed: 18_level_0  No. of Housing Units    1
30-Jun-09            Hectares                1
Unnamed: 20_level_0  No. of Housing Units    1
30-Jun-10    

Duplicate Values:


0

Unnamed: 0_level_0,Unnamed: 0_level_0,30-Jun-00,Unnamed: 2_level_0,30-Jun-01,Unnamed: 4_level_0,30-Jun-02,Unnamed: 6_level_0,30-Jun-03,Unnamed: 8_level_0,30-Jun-04,...,30-Jun-08,Unnamed: 18_level_0,30-Jun-09,Unnamed: 20_level_0,30-Jun-10,Unnamed: 22_level_0,30-Jun-11,Unnamed: 24_level_0,30-Jun-12,Unnamed: 26_level_0
Unnamed: 0_level_1,Local Authority,Hectares,No. of Housing Units,Hectares,No. of Housing Units,Hectares,No. of Housing Units,Hectares,No. of Housing Units,Hectares,...,Hectares,No. of Housing Units,Hectares,No. of Housing Units,Hectares,No. of Housing Units,Hectares,No. of Housing Units,Hectares,No. of Housing Units
count,36,34,34,35,35,35,35,35,35,35,...,35,35,35,35,35,35,35,35,35,35
unique,36,33,34,33,35,35,35,34,35,32,...,34,35,35,35,35,35,35,35,35,35
top,Carlow,136,3060,159,4339,201,4007,223,3629,227,...,112,3210,187,3020,189,3024,189,3024,378,9351
freq,1,2,1,2,1,1,1,2,1,2,...,2,1,1,1,1,1,1,1,1,1


Outliers:


In [6]:
# Source: https://data.gov.ie/dataset/national-house-construction-cost-index?package_type=dataset
national_house_cost_index_df = pd.read_csv("https://opendata.housing.gov.ie/dataset/aefa3fae-0ca1-4694-867a-a8a70ea77397/resource/188485c4-e5d7-4406-996b-1ff12a9a045a/download/national_house_construction_cost_index_0.csv", skiprows=1, header=[1])
descriptive_stats(national_house_cost_index_df)

Shape:  (14, 24)


Unnamed: 0,Month,1994,1995,1996,1997,1998,1999,2000,2001,2002,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,January,109.2,113.5,115.9,118,122.9,126.3,135.8,154.3,169.9,...,198.8,207.4,208,206.9,210.2,202.5,204.1,204.9,206.2,207.4
13,% Increase on previous year,3.40%,3.30%,1.40%,3.50%,3.70%,4.90%,7.60%,14.50%,6.40%,...,3.90%,3.80%,-1.40%,1.10%,-2.70%,0.20%,0.60%,0.60%,0.50%,


Schema:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14 entries, 0 to 13
Data columns (total 24 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Month   14 non-null     object 
 1   1994    14 non-null     object 
 2   1995    14 non-null     object 
 3   1996    14 non-null     object 
 4   1997    14 non-null     object 
 5   1998    14 non-null     object 
 6   1999    14 non-null     object 
 7   2000    14 non-null     object 
 8   2001    14 non-null     object 
 9   2002    14 non-null     object 
 10  2003    14 non-null     object 
 11  2004    14 non-null     object 
 12  2005    14 non-null     object 
 13  2006    14 non-null     object 
 14  2007    14 non-null     object 
 15  2008    14 non-null     object 
 16  2009    14 non-null     object 
 17  2010    14 non-null     object 
 18  2011    14 non-null     object 
 19  2012    14 non-null     object 
 20  2013    14 non-null     object 
 21  2014    14 non-null     object 
 

None

Null Values:


Month     0
1994      0
1995      0
1996      0
1997      0
1998      0
1999      0
2000      0
2001      0
2002      0
2003      0
2004      0
2005      0
2006      0
2007      0
2008      0
2009      0
2010      0
2011      0
2012      0
2013      0
2014      0
2015      0
2016     12
dtype: int64

Duplicate Values:


0

Unnamed: 0,2016
count,2.0
mean,207.25
std,0.212132
min,207.1
25%,207.175
50%,207.25
75%,207.325
max,207.4


Outliers:


In [7]:
# Source: https://data.gov.ie/dataset/national-house-construction-cost-index?package_type=dataset
national_house_cost_index_df = pd.read_csv("https://opendata.housing.gov.ie/dataset/aefa3fae-0ca1-4694-867a-a8a70ea77397/resource/188485c4-e5d7-4406-996b-1ff12a9a045a/download/national_house_construction_cost_index_0.csv", skiprows=1, header=[1])
descriptive_stats(national_house_cost_index_df)

Shape:  (14, 24)


Unnamed: 0,Month,1994,1995,1996,1997,1998,1999,2000,2001,2002,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,January,109.2,113.5,115.9,118,122.9,126.3,135.8,154.3,169.9,...,198.8,207.4,208,206.9,210.2,202.5,204.1,204.9,206.2,207.4
13,% Increase on previous year,3.40%,3.30%,1.40%,3.50%,3.70%,4.90%,7.60%,14.50%,6.40%,...,3.90%,3.80%,-1.40%,1.10%,-2.70%,0.20%,0.60%,0.60%,0.50%,


Schema:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14 entries, 0 to 13
Data columns (total 24 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Month   14 non-null     object 
 1   1994    14 non-null     object 
 2   1995    14 non-null     object 
 3   1996    14 non-null     object 
 4   1997    14 non-null     object 
 5   1998    14 non-null     object 
 6   1999    14 non-null     object 
 7   2000    14 non-null     object 
 8   2001    14 non-null     object 
 9   2002    14 non-null     object 
 10  2003    14 non-null     object 
 11  2004    14 non-null     object 
 12  2005    14 non-null     object 
 13  2006    14 non-null     object 
 14  2007    14 non-null     object 
 15  2008    14 non-null     object 
 16  2009    14 non-null     object 
 17  2010    14 non-null     object 
 18  2011    14 non-null     object 
 19  2012    14 non-null     object 
 20  2013    14 non-null     object 
 21  2014    14 non-null     object 
 

None

Null Values:


Month     0
1994      0
1995      0
1996      0
1997      0
1998      0
1999      0
2000      0
2001      0
2002      0
2003      0
2004      0
2005      0
2006      0
2007      0
2008      0
2009      0
2010      0
2011      0
2012      0
2013      0
2014      0
2015      0
2016     12
dtype: int64

Duplicate Values:


0

Unnamed: 0,2016
count,2.0
mean,207.25
std,0.212132
min,207.1
25%,207.175
50%,207.25
75%,207.325
max,207.4


Outliers:


In [8]:
# Source: https://data.gov.ie/dataset/annual-market-information-indices?package_type=dataset
annual_market_information_indices = pd.read_csv('https://opendata.housing.gov.ie/dataset/15c05066-0d0c-4cd0-9d6c-480c356b5fe6/resource/31248a26-f2e5-4c3d-ad42-2bdcac293bb2/download/market_information_indices_annual_0.csv', skiprows=3, header=[0])
descriptive_stats(annual_market_information_indices)

Shape:  (26, 6)


Unnamed: 0,Year and Quarter,Private new house prices,mortgage interest rates,Average earnings of adult workers,House Construction Cost Index,Consumer Prices
0,1990,98,99,99.0,98,98
25,2015,420,27,,205,169


Schema:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 6 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   Year and Quarter                   26 non-null     int64  
 1   Private new house prices           26 non-null     int64  
 2   mortgage interest rates            26 non-null     int64  
 3   Average earnings of adult workers  17 non-null     float64
 4   House Construction Cost Index      26 non-null     int64  
 5   Consumer Prices                    26 non-null     int64  
dtypes: float64(1), int64(5)
memory usage: 1.3 KB


None

Null Values:


Year and Quarter                     0
Private new house prices             0
mortgage interest rates              0
Average earnings of adult workers    9
House Construction Cost Index        0
Consumer Prices                      0
dtype: int64

Duplicate Values:


0

Unnamed: 0,Year and Quarter,Private new house prices,mortgage interest rates,Average earnings of adult workers,House Construction Cost Index,Consumer Prices
count,26.0,26.0,26.0,17.0,26.0,26.0
mean,2002.5,275.038462,49.153846,137.882353,160.269231,137.923077
std,7.648529,128.320063,22.522331,29.989336,41.860299,25.794454
min,1990.0,98.0,27.0,99.0,98.0,98.0
25%,1996.25,135.75,32.0,114.0,117.0,113.5
50%,2002.5,312.5,42.5,129.0,172.5,141.0
75%,2008.75,364.5,59.75,163.0,202.0,163.75
max,2015.0,480.0,100.0,189.0,208.0,170.0


Outliers:


In [9]:
# Source: https://data.gov.ie/dataset/house-registrations-by-area?package_type=dataset
house_registration_df = pd.read_csv("https://opendata.housing.gov.ie/dataset/8d049ce8-254f-4636-b97a-37afd79bb4c4/resource/1d898b5e-7974-444f-b43c-5eb5162718ae/download/house_registrations_by_area.csv", header=[0,1], skiprows=1)
descriptive_stats(house_registration_df)

Shape:  (28, 182)


Unnamed: 0_level_0,Year:,1978,1979,1980,1981,1982,1983,1984,1985,1986,...,Unnamed: 172_level_0,Unnamed: 173_level_0,Unnamed: 174_level_0,Unnamed: 175_level_0,Unnamed: 176_level_0,Unnamed: 177_level_0,Unnamed: 178_level_0,Unnamed: 179_level_0,Unnamed: 180_level_0,Unnamed: 181_level_0
Unnamed: 0_level_1,County,\n TOTAL,\n TOTAL,\n TOTAL,\n TOTAL,\n TOTAL,\n TOTAL,\n TOTAL,\n TOTAL,\n TOTAL,...,May,June,July,August,September,2016 to date,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1
0,Carlow,1.0,5.0,44.0,97.0,61.0,99.0,64.0,69.0,41.0,...,2.0,0.0,2.0,0.0,2.0,9.0,,,,
27,,,,,,,,,,,...,,,,,,,,,,


Schema:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28 entries, 0 to 27
Columns: 182 entries, ('Year:', 'County') to ('Unnamed: 181_level_0', 'Unnamed: 181_level_1')
dtypes: float64(105), object(77)
memory usage: 39.9+ KB


None

Null Values:


Year:                 County                   1
1978                  \n TOTAL                 1
1979                  \n TOTAL                 1
1980                  \n TOTAL                 1
1981                  \n TOTAL                 1
                                              ..
Unnamed: 177_level_0  2016 to date             1
Unnamed: 178_level_0  Unnamed: 178_level_1    28
Unnamed: 179_level_0  Unnamed: 179_level_1    28
Unnamed: 180_level_0  Unnamed: 180_level_1    28
Unnamed: 181_level_0  Unnamed: 181_level_1    28
Length: 182, dtype: int64

Duplicate Values:


0

Unnamed: 0_level_0,Unnamed: 69_level_0,Unnamed: 70_level_0,Unnamed: 72_level_0,Unnamed: 73_level_0,Unnamed: 74_level_0,2009,Unnamed: 77_level_0,Unnamed: 78_level_0,Unnamed: 79_level_0,Unnamed: 80_level_0,...,Unnamed: 171_level_0,Unnamed: 172_level_0,Unnamed: 173_level_0,Unnamed: 174_level_0,Unnamed: 175_level_0,Unnamed: 176_level_0,Unnamed: 178_level_0,Unnamed: 179_level_0,Unnamed: 180_level_0,Unnamed: 181_level_0
Unnamed: 0_level_1,Jul,Aug,Oct,Nov,Dec,Jan,Feb,Mar,Apr,May,...,Apr,May,June,July,August,September,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1
count,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,...,27.0,27.0,27.0,27.0,27.0,27.0,0.0,0.0,0.0,0.0
mean,40.222222,45.62963,39.703704,35.407407,44.666667,17.851852,34.666667,33.851852,26.518519,15.703704,...,31.185185,42.444444,22.0,41.925926,40.666667,24.888889,,,,
std,106.94439,122.943435,106.639152,92.578557,123.62412,46.390237,102.986183,87.929038,70.381035,41.633765,...,88.779345,117.97892,58.660693,119.358518,119.04944,70.614519,,,,
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,
25%,5.5,3.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,1.5,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,
50%,9.0,9.0,8.0,10.0,4.0,4.0,3.0,10.0,4.0,3.0,...,1.0,2.0,1.0,1.0,1.0,0.0,,,,
75%,24.0,15.5,12.5,21.0,18.5,14.0,9.0,31.0,16.5,5.5,...,8.0,20.5,15.0,10.5,15.0,7.5,,,,
max,543.0,616.0,536.0,478.0,603.0,241.0,468.0,457.0,358.0,212.0,...,421.0,573.0,297.0,566.0,549.0,336.0,,,,


Outliers:


In [10]:
# Source: https://data.gov.ie/dataset/lrm03-seasonally-adjusted-standardised-unemployment-rate-/resource/2d63d7f4-7e3e-42ab-856f-f0817f69fb17
unemployment_rates = pd.read_csv('https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/LRM03/CSV/1.0/en', header=[0])
descriptive_stats(unemployment_rates)

Shape:  (388, 8)


Unnamed: 0,STATISTIC,STATISTIC Label,TLIST(M1),Month,C02196V02652,State,UNIT,VALUE
0,LRM03,Seasonally Adjusted Standardised Unemployment ...,198301,1983M01,-,State,%,13.0
387,LRM03,Seasonally Adjusted Standardised Unemployment ...,201504,2015M04,-,State,%,9.8


Schema:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 388 entries, 0 to 387
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   STATISTIC        388 non-null    object 
 1   STATISTIC Label  388 non-null    object 
 2   TLIST(M1)        388 non-null    int64  
 3   Month            388 non-null    object 
 4   C02196V02652     388 non-null    object 
 5   State            388 non-null    object 
 6   UNIT             388 non-null    object 
 7   VALUE            388 non-null    float64
dtypes: float64(1), int64(1), object(6)
memory usage: 24.4+ KB


None

Null Values:


STATISTIC          0
STATISTIC Label    0
TLIST(M1)          0
Month              0
C02196V02652       0
State              0
UNIT               0
VALUE              0
dtype: int64

Duplicate Values:


0

Unnamed: 0,TLIST(M1),VALUE
count,388.0,388.0
mean,199873.469072,10.970103
std,934.675282,4.654628
min,198301.0,3.7
25%,199101.75,4.975
50%,199902.5,12.6
75%,200703.25,14.8
max,201504.0,17.3


Outliers:


In [11]:
# Source: https://data.gov.ie/dataset/nqq48-gross-domestic-product-and-gross-national-product?package_type=dataset
gdp_df = pd.read_csv("https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/NQQ48/CSV/1.0/en", header=[0])
descriptive_stats(gdp_df)

Shape:  (2616, 8)


Unnamed: 0,STATISTIC,Statistic Label,TLIST(Q1),Quarter,C02196V02652,State,UNIT,VALUE
0,NQQ48C01,GVA at Constant Basic Prices (chain linked ann...,19951,1995Q1,-,State,Euro Million,21375
2615,NQQ48S12,GNP at Current Market Prices (Seasonally Adjus...,20221,2022Q1,-,State,Euro Million,81936


Schema:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2616 entries, 0 to 2615
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   STATISTIC        2616 non-null   object
 1   Statistic Label  2616 non-null   object
 2   TLIST(Q1)        2616 non-null   int64 
 3   Quarter          2616 non-null   object
 4   C02196V02652     2616 non-null   object
 5   State            2616 non-null   object
 6   UNIT             2616 non-null   object
 7   VALUE            2616 non-null   int64 
dtypes: int64(2), object(6)
memory usage: 163.6+ KB


None

Null Values:


STATISTIC          0
Statistic Label    0
TLIST(Q1)          0
Quarter            0
C02196V02652       0
State              0
UNIT               0
VALUE              0
dtype: int64

Duplicate Values:


0

Unnamed: 0,TLIST(Q1),VALUE
count,2616.0,2616.0
mean,20083.770642,22588.679281
std,78.670089,29127.732567
min,19951.0,-35085.0
25%,20014.0,-315.0
50%,20083.0,9677.5
75%,20152.0,42409.5
max,20221.0,116812.0


Outliers:


In [12]:
# Source: https://data.gov.ie/dataset/fim09-financial-interest-rates-historical-series?package_type=dataset
fin_interest_rates = pd.read_csv('https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/FIM09/CSV/1.0/en', header=[0])
descriptive_stats(fin_interest_rates)

Shape:  (1197, 8)


Unnamed: 0,STATISTIC,Statistic Label,C02567V03112,Interest Rate,TLIST(M1),Month,UNIT,VALUE
0,FIM09,Financial Interest Rates (Historical Series),1,Central Bank rediscount rate,197501,1975M01,%,11.0
1196,FIM09,Financial Interest Rates (Historical Series),4,Representative Building Societies mortgage rate,200803,2008M03,%,5.46


Schema:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1197 entries, 0 to 1196
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   STATISTIC        1197 non-null   object 
 1   Statistic Label  1197 non-null   object 
 2   C02567V03112     1197 non-null   int64  
 3   Interest Rate    1197 non-null   object 
 4   TLIST(M1)        1197 non-null   int64  
 5   Month            1197 non-null   object 
 6   UNIT             1197 non-null   object 
 7   VALUE            809 non-null    float64
dtypes: float64(1), int64(2), object(5)
memory usage: 74.9+ KB


None

Null Values:


STATISTIC            0
Statistic Label      0
C02567V03112         0
Interest Rate        0
TLIST(M1)            0
Month                0
UNIT                 0
VALUE              388
dtype: int64

Duplicate Values:


0

Unnamed: 0,C02567V03112,TLIST(M1),VALUE
count,1197.0,1197.0,809.0
mean,2.666667,199119.24812,10.412237
std,1.24774,960.251898,3.550392
min,1.0,197501.0,3.47
25%,1.0,198304.0,7.75
50%,3.0,199108.0,10.8
75%,4.0,199912.0,13.0
max,4.0,200803.0,19.19


Outliers:


In [13]:
# Source: https://data.gov.ie/dataset/cbm02-retail-interest-rates-households/resource/ebbf533d-c446-4320-9945-87c8d7900f6c
retail_interest_rates = pd.read_csv('https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/CBM02/CSV/1.0/en', header=[0])
descriptive_stats(retail_interest_rates)

Shape:  (482, 8)


Unnamed: 0,STATISTIC,Statistic Label,TLIST(M1),Month,C02196V02652,State,UNIT,VALUE
0,CBM02C1,Variable interest rate and up to one year fixa...,200301,2003M01,-,State,%,4.08
481,CBM02C2,Interest rate on consumer loans outstanding,202301,2023M01,-,State,%,6.89


Schema:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   STATISTIC        482 non-null    object 
 1   Statistic Label  482 non-null    object 
 2   TLIST(M1)        482 non-null    int64  
 3   Month            482 non-null    object 
 4   C02196V02652     482 non-null    object 
 5   State            482 non-null    object 
 6   UNIT             482 non-null    object 
 7   VALUE            482 non-null    float64
dtypes: float64(1), int64(1), object(6)
memory usage: 30.2+ KB


None

Null Values:


STATISTIC          0
Statistic Label    0
TLIST(M1)          0
Month              0
C02196V02652       0
State              0
UNIT               0
VALUE              0
dtype: int64

Duplicate Values:


0

Unnamed: 0,TLIST(M1),VALUE
count,482.0,482.0
mean,201260.834025,4.994896
std,579.946637,1.754759
min,200301.0,2.57
25%,200801.0,3.23
50%,201301.0,5.06
75%,201801.0,6.525
max,202301.0,8.23


Outliers:
