In [41]:
%%writefile streamlit_pro.py
import streamlit as st
import pandas as pd
import plotly.express as px
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
import plotly.graph_objects as go

st.set_page_config(layout='wide',
                  page_title = 'dashboard')

st.title('Connecticut real estate market')
st.header('A 20 year progression')


tab1, tab2, tab3 = st.tabs(['overview', 'in debth', 'conclusion'])
df=pd.read_csv('cleaned_final_data.csv')
num = df.describe()
cat = df.describe(include = 'O')
dataf= df.head(5)

with tab1:
    st.subheader('Connecticut is a northern state on the east coast of the United States. It’s famous for the diversity in scenery from coastline to woodlands.')
    st.subheader('This analysis is based on data gathered from 170 towns within the state of Connecticut between 1999 and 2021.', divider='blue')
    st.dataframe(dataf)
    st.subheader('It features 6 main property types [residential, commercial, industrial, vacant land, apartment & public utility]. Furthermore, it makes the distinction between 5 types of residential housing [single family, two family, three family, four family and condo].')
    col1 , col2 , col3 = st.columns(3)
    with col1:
        labels = ['Residential','Vacant Land','Commercial','Apartments','Industrial','Public Utility ']
        values = [608849, 3162, 1981, 485, 228 ]
        fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
        st.plotly_chart(fig,use_container_width=True)
        
            
    st.divider()
    with col3:
        labels = ['Single Family ','Condo','Two Family','Three Family ','Land','Four Family','Office', 'Factory', 'Utility' ]
        values = [445460, 445460, 29608, 14078, 3162, 2418, 1981,228, 5]
        fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
        st.plotly_chart(fig,use_container_width=True)
        
       
    st.subheader('Over the past 20 years the real estate market has undergone significant changes and fluctuations. The average price for a residential property has increased from \\$95,000 in 1999 to \\$500,428 in 2021. This has been attributed to an influx of wealthy buyers from nearby real estate hubs such as the state New York.', divider='blue')
    
    
    col1 , col2 , col3 = st.columns(3)
    with col1:
        
        st.subheader('numerical descriptive statistics')
        st.dataframe(num)
    
    with col3:
        st.subheader('categorical descriptive statistics')
        st.dataframe(cat)
    
with tab2:
    
    
        st.subheader('Average sales per town')
        fig = px.histogram(data_frame=df , y = df['town'] , x = df['sale_amount'] ,histfunc='avg' , text_auto=True ).update_yaxes(categoryorder = 'total ascending')
        st.plotly_chart(fig,use_container_width=True)
        
        st.subheader('Sum of sales per town')
        fig = px.histogram(data_frame=df , y = df['town'] , x = df['sale_amount'] ,histfunc='sum' , text_auto=True ).update_yaxes(categoryorder = 'total ascending')
        st.plotly_chart(fig,use_container_width=True)
        
        st.subheader('Average sales per property type')
        fig = px.histogram(data_frame=df , y = df['property_type'] , x = df['sale_amount'] , histfunc='avg' , text_auto=True ).update_yaxes(categoryorder = 'total ascending')
        st.plotly_chart(fig,use_container_width=True)
        fig =  px.histogram(data_frame=df , y = df['residential_type'] , x = df['sale_amount'] ,histfunc='avg' , text_auto=True ).update_yaxes(categoryorder = 'total ascending')   
        st.plotly_chart(fig,use_container_width=True)
    
        st.subheader('box plot before outlire handling')
        fig = px.box(data_frame=df , x = df['property_type'],y=["sale_amount"])
        st.plotly_chart(fig,use_container_width=True)
        
        st.subheader('box plot after outlire handling')
        df = df.drop(df[df['sale_amount'] == 5000000000].index)
        fig = px.box(data_frame=df , x = df['property_type'],y=["sale_amount"])
        st.plotly_chart(fig,use_container_width=True)
        col1, col2 , col3 = st.columns(3)
        
        
   
        
        
with tab3:

        st.subheader('Real estate evaluation through 1999-2021')
        graph = df.groupby(['Year'])['sale_difference'].mean().reset_index()
        fig = px.line(data_frame=graph, x='Year', y='sale_difference')
        st.plotly_chart(fig,use_container_width=True)
        
        st.subheader('Sum of sales through 1999-2021')
        yearly = df.groupby('Year')['sale_amount'].sum().reset_index()
        fig = px.bar(yearly, x='Year', y='sale_amount', labels={'Year': 'Year', 'Sales': 'Sum of Sales'})
        st.plotly_chart(fig,use_container_width=True)
        
        st.subheader('Yearly number of properties sold in 1999-2021')
        yearly_1=df.groupby('Year')['date_recorded'].count().reset_index()
        fig=px.line(data_frame=yearly_1,x=yearly_1['Year'],y=yearly_1['date_recorded'])
        st.plotly_chart(fig,use_container_width=True)
        
        st.subheader('Avg of sales amount in 1999-2021')
        new=df.groupby(['Year'])['sale_amount'].mean().reset_index()
        fig=px.line(data_frame=new ,x=new['Year'] ,y=new['sale_amount'])
        st.plotly_chart(fig,use_container_width=True)
        
        
        #st.subheader('Avg of sales profit in 1999-2021')
        #sale_price_diff_yearly =  df.groupby('Year')['sale_difference'].mean().reset_index()
        #fig = px.bar(sale_price_diff_yearly, x='Year', y='sale_difference', labels={'Year': 'Year', '%sales': 'average of %Sales'})
        #st.plotly_chart(fig,use_container_width=True)
        
        col1 , col2 , col3 = st.columns(3)
        st.subheader("Common towns in the top 10 towns with the highest profit are Norwalk, Stamford, Darien, Danbury, Ridgefield")

 
        with col1:
            st.subheader('Top 10 Towns with the highest profit in 2020')
            year_2020_top_10 = df[df["Year"]==2020]
            top = year_2020_top_10.groupby('town')['sale_difference'].sum().sort_values(ascending=False).reset_index().head(10)
            pd.set_option('display.float_format', lambda top: '%.3f' % top)
            fig = px.histogram(data_frame=top, x = top['town'] , y = top['sale_difference'] ,barmode='group'  ,
             text_auto=True , histfunc='sum')
            st.plotly_chart(fig,use_container_width=True)
            
        with col3:
            
            st.subheader('Top 10 Towns with the highest profit in 2021')
            year_2021_top_10 = df[df["Year"]==2021]
            top2 = year_2021_top_10.groupby('town')['sale_difference'].sum().sort_values(ascending=False).reset_index().head(10)
            pd.set_option('display.float_format', lambda top2: '%.3f' % top2)
            fig=px.histogram(data_frame=top2, x = top2['town'] , y = top2['sale_difference'] ,barmode='group'  ,
             text_auto=True , histfunc='sum')
            st.plotly_chart(fig,use_container_width=True)
         
        col1 , col2 , col3 = st.columns(3)
            
        with col1:
            st.subheader('Top 10 Towns with the lowest profit in 2020')
            top_1= year_2020_top_10.groupby('town')['sale_difference'].sum().sort_values(ascending=False).reset_index().tail(10)
            pd.set_option('display.float_format', lambda top_1: '%.3f' % top_1)
            fig=px.histogram(data_frame=top_1, x = top_1['town'] , y = top_1['sale_difference'] ,barmode='group'  ,
             text_auto=True , histfunc='sum')
            st.plotly_chart(fig,use_container_width=True)
            
        with col3:
            
            st.subheader('Top 10 Towns with the lowest profit in 2021')
            top_2 = year_2021_top_10.groupby('town')['sale_difference'].sum().sort_values(ascending=False).reset_index().tail(10)
            pd.set_option('display.float_format', lambda top_2: '%.3f' % top_2)
            fig=px.histogram(data_frame=top_2, x = top_2['town'] , y = top_2['sale_difference'] ,barmode='group'  ,
             text_auto=True , histfunc='sum')
            st.plotly_chart(fig,use_container_width=True)
        st.subheader("Common towns in the top 10 towns with the lowest profit are Bozrah, Chaplin, Hampton, Scotland, Union, Preston")
      

Overwriting streamlit_pro.py


In [42]:
! streamlit run streamlit_pro.py

^C
