# Data Loading

In [1]:
import pandas as pd

# Load the CSV file
try:
    df = pd.read_csv("skyscrapers.csv")
    print("Data loaded successfully!")
    print(df.head())
except FileNotFoundError:
    print("Error: skyscrapers.csv not found in the current folder.")
except pd.errors.EmptyDataError:
    print("Error: The file is empty.")
except pd.errors.ParserError:
    print("Error: File parsing issue.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


Data loaded successfully!
      id   material                      name  location.city  \
0     12      steel              The Illinois        Chicago   
1     62  composite             Chicago Spire        Chicago   
2     68  composite  Miglin-Beitler Skyneedle        Chicago   
3     98  composite    One World Trade Center  New York City   
4  14269   concrete        Central Park Tower  New York City   

   location.city_id location.country  location.country id  location.latitude  \
0              1539               US                  163           0.000000   
1              1539               US                  163          41.889889   
2              1539               US                  163          41.881618   
3              1641               US                  163          40.713112   
4              1641               US                  163          40.766361   

   location.longitude  purposes.abandoned  ...  purposes.telecommunications  \
0            0.000000        

In [5]:
df

Unnamed: 0,id,material,name,location.city,location.city_id,location.country,location.country id,location.latitude,location.longitude,purposes.abandoned,...,purposes.telecommunications,statistics.floors above,statistics.height,statistics.number of purposes,statistics.rank,status.current,status.completed.is completed,status.completed.year,status.started.is started,status.started.year
0,12,steel,The Illinois,Chicago,1539,US,163,0.000000,0.000000,True,...,True,528,1609.359985,4,1,vision,True,0,True,0
1,62,composite,Chicago Spire,Chicago,1539,US,163,41.889889,-87.614861,True,...,True,150,609.599976,1,2,never completed,True,0,True,2007
2,68,composite,Miglin-Beitler Skyneedle,Chicago,1539,US,163,41.881618,-87.634354,True,...,True,125,609.489990,1,3,vision,True,0,True,0
3,98,composite,One World Trade Center,New York City,1641,US,163,40.713112,-74.013351,True,...,True,94,541.299988,1,4,completed,True,2014,True,2006
4,14269,concrete,Central Park Tower,New York City,1641,US,163,40.766361,-73.980949,True,...,True,95,541.020020,3,5,under construction,True,2019,True,2014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2426,18633,concrete,Hallets Point 2,New York City,1641,US,163,0.000000,0.000000,True,...,True,0,0.000000,1,2423,proposed,True,0,True,0
2427,18635,concrete,Hallets Point 3,New York City,1641,US,163,0.000000,0.000000,True,...,True,0,0.000000,1,2423,proposed,True,0,True,0
2428,18636,concrete,Hallets Point 4,New York City,1641,US,163,0.000000,0.000000,True,...,True,0,0.000000,1,2423,proposed,True,0,True,0
2429,18637,concrete,Hallets Point 5,New York City,1641,US,163,0.000000,0.000000,True,...,True,0,0.000000,1,2423,proposed,True,0,True,0


**My Project Design Phase: Skyscrapers Data Exploration**

### **Queries and User-Driven Questions:**
1. **Tallest Skyscrapers by Location**  
   **Question:** What are the tallest skyscrapers in a selected country or city?  
   **User Input:** I will use a dropdown to select the country or city.  
   **Visualization:** I will create a bar chart showing skyscraper heights, labeled with names.

2. **Skyscraper Completion Timeline**  
   **Question:** How many skyscrapers were completed within a specific year range?  
   **User Input:** I will implement a slider for selecting a year range.
   **Visualization:** I will create a line chart showing the number of skyscrapers completed per year.

3. **Skyscraper Purpose Distribution**  
   **Question:** What is the distribution of purposes for skyscrapers in a selected location?  
   **User Input:** I will provide a dropdown for selecting a country or city.
   **Visualization:** I will create a pie chart showing proportions of different purposes.

### **Interactive Widgets:**
- **Dropdowns:** For selecting countries or cities.
- **Sliders:** For choosing year ranges.
- **Buttons:** For triggering data queries.

### **Data Presentation:**
- **Bar Charts:** To compare skyscraper heights and completion counts.
- **Line Charts:** To show trends over time.
- **Pie Charts:** To visualize purpose distribution.
- **Maps:** I will use PyDeck to plot skyscraper locations with tooltips showing names, heights, and completion years.

### **User Experience Considerations:**
- **Labels:** I will ensure clearly labeled controls and chart titles.
- **Navigation:** I will organize the layout with sections for different queries.
- **Visual Appeal:** I will apply custom color schemes, interactive maps, and tooltips for additional details.

### **Next Steps:**
- Set up the Streamlit interface.
- Load and clean the data.
- Implement the defined queries and visualizations.

I believe this design will provide a dynamic and visually engaging exploration of the Skyscrapers dataset, meeting both analytical and presentation goals.



In [4]:
import streamlit as st
import pandas as pd
import pydeck as pdk
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv("skyscrapers.csv")

# Streamlit app title
st.title("Skyscraper Data Explorer")

# Sidebar for user inputs
st.sidebar.header("Filters")

# Filters for user selection
country = st.sidebar.selectbox("Select a Country", df['location.country'].unique())
year_range = st.sidebar.slider("Select Completion Year Range", int(df['status.completed.year'].min()), int(df['status.completed.year'].max()), (2000, 2020))

# Filter data based on user input
filtered_data = df[(df['location.country'] == country) & (df['status.completed.year'].between(year_range[0], year_range[1]))]

# Visualization 1: Bar Chart - Tallest Skyscrapers
st.subheader("Tallest Skyscrapers in " + country)
top_skyscrapers = filtered_data.nlargest(10, 'statistics.height')
st.bar_chart(top_skyscrapers.set_index('name')['statistics.height'])

# Visualization 2: Line Chart - Completion Over Time
st.subheader("Skyscraper Completions Over Time")
completion_count = filtered_data.groupby('status.completed.year').size()
st.line_chart(completion_count)

# Visualization 3: Pie Chart - Purpose Distribution
st.subheader("Skyscraper Purpose Distribution in " + country)
purpose_columns = [col for col in df.columns if col.startswith('purposes.')]
purpose_sums = filtered_data[purpose_columns].sum().sort_values(ascending=False)
fig, ax = plt.subplots()
ax.pie(purpose_sums, labels=purpose_sums.index.str.replace('purposes.', ''), autopct='%1.1f%%', startangle=140, colors=sns.color_palette("pastel"))
st.pyplot(fig)

# Visualization 4: Map - Skyscraper Locations
st.subheader("Skyscraper Map View")
st.pydeck_chart(pdk.Deck(
    map_style='mapbox://styles/mapbox/light-v10',
    initial_view_state=pdk.ViewState(
        latitude=filtered_data['location.latitude'].mean(),
        longitude=filtered_data['location.longitude'].mean(),
        zoom=5,
        pitch=50,
    ),
    layers=[
        pdk.Layer(
            'ScatterplotLayer',
            data=filtered_data,
            get_position='[location.longitude, location.latitude]',
            get_fill_color='[200, 30, 0, 160]',
            get_radius=50000,
            pickable=True,
        )
    ]
))

# Summary Report
st.subheader("Summary Report")
st.write(f"Total Skyscrapers in {country}: {filtered_data.shape[0]}")
st.write(f"Tallest Skyscraper: {filtered_data.loc[filtered_data['statistics.height'].idxmax(), 'name']} ({filtered_data['statistics.height'].max()} meters)")
st.write(f"Oldest Skyscraper Completed: {filtered_data['status.completed.year'].min()}")
st.write(f"Most Recent Skyscraper Completed: {filtered_data['status.completed.year'].max()}")

st.write("Explore more using the filters on the sidebar!")
