In [3]:
import pandas as pd
import plotly.express as px
import numpy as np

# Read the CSV file
df_green = pd.read_csv('iso_green.csv')

# Filter out outliers
def filter_outliers(data, threshold=2):
    z_scores = np.abs((data - np.mean(data)) / np.std(data))
    filtered_data = data[z_scores < threshold]
    return filtered_data

filtered_technologies = filter_outliers(df_green['Development of environment-related technologies, % inventions worldwide'])
filtered_emissions = filter_outliers(df_green['Demand-based CO2 emissions'])
filtered_years = df_green.loc[filtered_technologies.index, 'Year']

# Create a new DataFrame with filtered data
filtered_df = pd.DataFrame({'Year': filtered_years, 'Technologies': filtered_technologies, 'Emissions': filtered_emissions})

# Remove rows with missing values
filtered_df = filtered_df.dropna()

# Calculate averages for each year
avg_technologies = filtered_df.groupby('Year')['Technologies'].mean().values
avg_emissions = filtered_df.groupby('Year')['Emissions'].mean().values
filtered_years = filtered_df['Year'].unique()

# Create a new DataFrame with the averages
averages_df = pd.DataFrame({'Year': filtered_years, 'Avg Technologies': avg_technologies, 'Avg Emissions': avg_emissions})

# Log the avg_technologies
print("Average Technologies per Year:")
print(avg_technologies)

# Create a scatter plot
fig3d = px.scatter_3d(averages_df, x='Year', y='Avg Technologies', z='Avg Emissions')

fig3d.update_layout(
    scene=dict(
        xaxis_title='Year',
        yaxis_title='Avg Technologies',
        zaxis_title='Avg Emissions'
    ),
    title='Scatter Plot'
)
%store fig3d

fig3d.show()


Average Technologies per Year:
[1.40015385 1.0453125  1.06140625 1.40261538 1.43030769 1.11703125
 1.158125   1.18171875 1.57230769 1.57507692 1.94030303 1.56969231
 1.86769231 1.4821875  1.85615385 1.84692308 2.17212121 2.22765625
 2.3352459 ]
Stored 'fig3d' (Figure)


In [4]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np
import statsmodels.api as sm

# Read the CSV file
df_green = pd.read_csv('iso_green.csv')

# Filter out outliers
def filter_outliers(data, threshold=2):
    z_scores = np.abs((data - np.mean(data)) / np.std(data))
    filtered_data = data[z_scores < threshold]
    return filtered_data

filtered_technologies = filter_outliers(df_green['Development of environment-related technologies, % inventions worldwide'])
filtered_emissions = filter_outliers(df_green['Demand-based CO2 emissions'])
filtered_years = df_green.loc[filtered_technologies.index, 'Year']

# Create a new DataFrame with filtered data
filtered_df = pd.DataFrame({'Year': filtered_years, 'Technologies': filtered_technologies, 'Emissions': filtered_emissions})

# Remove rows with missing values
filtered_df = filtered_df.dropna()

# Calculate averages for each year
avg_technologies = filtered_df.groupby('Year')['Technologies'].mean().values
avg_emissions = filtered_df.groupby('Year')['Emissions'].mean().values
filtered_years = filtered_df['Year'].unique()

# Create a new DataFrame with the averages
averages_df = pd.DataFrame({'Year': filtered_years, 'Avg Technologies': avg_technologies, 'Avg Emissions': avg_emissions})

# Perform linear regression
X = sm.add_constant(avg_technologies)  # Add constant term to the features
model = sm.OLS(avg_emissions, X)
results = model.fit()
reg_line = results.predict(X)

# Create scatter plot with regression line
fig3dreg = go.Figure()

# Add scatter trace with year annotations
for year, tech, emiss in zip(filtered_years, avg_technologies, avg_emissions):
    fig3dreg.add_annotation(
        x=tech,
        y=emiss,
        text=str(int(year)),  # Convert year to int and convert to string
        showarrow=False,
        font=dict(size=8),
        xshift=5,
        yshift=-10
    )

# Add scatter trace
fig3dreg.add_trace(go.Scatter(
    x=avg_technologies,
    y=avg_emissions,
    mode='markers',
    marker=dict(color='rgb(31, 119, 180)'),
    name='Data'
))

# Add regression line
fig3dreg.add_trace(go.Scatter(
    x=avg_technologies,
    y=reg_line,
    mode='lines',
    line=dict(color='red'),
    name='Regression Line'
))

fig3dreg.update_layout(
    xaxis_title='Avg Technologies',
    yaxis_title='Avg Emissions',
    title='Scatter Plot with Regression Line'
)

%store fig3dreg
fig3dreg.show()


Stored 'fig3dreg' (Figure)
