In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import tensorflow as tf

In [2]:
# Load data
file_path = "usa_co2_temp_1800_2013.csv"
predict_temp_df = pd.read_csv(file_path)
predict_temp_df

Unnamed: 0,country,year,co2,co2_per_capita,cement_co2,cement_co2_per_capita,coal_co2,coal_co2_per_capita,flaring_co2,gas_co2,...,co2_per_gdp,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,population,gdp,energy_per_gdp,Avg Temperature,Avg Temperature Uncertainty
0,United States,1800,0.253,0.042,,,0.253,0.042,,,...,,,,,,6000000,,,12.377868,3.205628
1,United States,1801,0.267,0.044,,,0.267,0.044,,,...,,,,,,6113782,,,12.681863,2.895417
2,United States,1802,0.289,0.046,,,0.289,0.046,,,...,,,,,,6229723,,,12.859703,3.065703
3,United States,1803,0.297,0.047,,,0.297,0.047,,,...,,,,,,6347862,,,12.692603,3.065032
4,United States,1804,0.333,0.052,,,0.333,0.052,,,...,,,,,,6468241,,,12.576432,3.379515
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,United States,2009,5478.210,17.885,29.615,0.097,1905.943,6.222,39.071,1245.742,...,0.368,649.89,2.122,241.81,0.789,306307565,1.489429e+13,1.676,11.156327,0.220114
210,United States,2010,5675.786,18.368,31.449,0.102,2013.304,6.515,41.343,1308.832,...,0.372,650.38,2.105,247.17,0.800,309011469,1.527062e+13,1.688,11.732877,0.211351
211,United States,2011,5540.173,17.781,32.208,0.103,1903.555,6.109,45.542,1327.528,...,0.357,628.73,2.018,255.55,0.820,311584051,1.550930e+13,1.650,11.811093,0.232825
212,United States,2012,5338.698,17.000,35.270,0.112,1684.037,5.362,48.883,1388.255,...,0.337,618.71,1.970,246.42,0.785,314043885,1.584964e+13,1.567,12.761842,0.269340


In [3]:
predict_temp_df.tail()

Unnamed: 0,country,year,co2,co2_per_capita,cement_co2,cement_co2_per_capita,coal_co2,coal_co2_per_capita,flaring_co2,gas_co2,...,co2_per_gdp,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,population,gdp,energy_per_gdp,Avg Temperature,Avg Temperature Uncertainty
209,United States,2009,5478.21,17.885,29.615,0.097,1905.943,6.222,39.071,1245.742,...,0.368,649.89,2.122,241.81,0.789,306307565,14894290000000.0,1.676,11.156327,0.220114
210,United States,2010,5675.786,18.368,31.449,0.102,2013.304,6.515,41.343,1308.832,...,0.372,650.38,2.105,247.17,0.8,309011469,15270620000000.0,1.688,11.732877,0.211351
211,United States,2011,5540.173,17.781,32.208,0.103,1903.555,6.109,45.542,1327.528,...,0.357,628.73,2.018,255.55,0.82,311584051,15509300000000.0,1.65,11.811093,0.232825
212,United States,2012,5338.698,17.0,35.27,0.112,1684.037,5.362,48.883,1388.255,...,0.337,618.71,1.97,246.42,0.785,314043885,15849640000000.0,1.567,12.761842,0.26934
213,United States,2013,5474.257,17.302,36.369,0.115,1740.833,5.502,54.647,1427.506,...,0.34,619.02,1.956,247.39,0.782,316400539,16108430000000.0,1.589,13.09842,0.338102


In [4]:
predict_temp_df.nunique()

country                          1
year                           214
co2                            213
co2_per_capita                 209
cement_co2                     102
cement_co2_per_capita           56
coal_co2                       213
coal_co2_per_capita            208
flaring_co2                     64
gas_co2                        131
gas_co2_per_capita             131
oil_co2                        154
oil_co2_per_capita             150
co2_growth_prct                207
co2_per_gdp                    181
methane                         24
methane_per_capita              24
nitrous_oxide                   24
nitrous_oxide_per_capita        23
population                     214
gdp                            194
energy_per_gdp                  49
Avg Temperature                214
Avg Temperature Uncertainty    214
dtype: int64

In [5]:
# Removing the NaN values by replacing the NaN values with the mean average. 
predict_temp_df['flaring_co2'].fillna(predict_temp_df['flaring_co2'].mean(), inplace = True)
predict_temp_df['cement_co2'].fillna(predict_temp_df['cement_co2'].mean(), inplace = True)
predict_temp_df['cement_co2_per_capita'].fillna(predict_temp_df['cement_co2_per_capita'].mean(), inplace = True)
predict_temp_df['gas_co2'].fillna(predict_temp_df['gas_co2'].mean(), inplace = True)
predict_temp_df['gas_co2_per_capita'].fillna(predict_temp_df['gas_co2_per_capita'].mean(), inplace = True)
predict_temp_df['oil_co2'].fillna(predict_temp_df['oil_co2'].mean(), inplace = True)
predict_temp_df['oil_co2_per_capita'].fillna(predict_temp_df['oil_co2_per_capita'].mean(), inplace = True)
predict_temp_df['co2_per_gdp'].fillna(predict_temp_df['co2_per_gdp'].mean(), inplace = True)
predict_temp_df['methane'].fillna(predict_temp_df['methane'].mean(), inplace = True)
predict_temp_df['methane_per_capita'].fillna(predict_temp_df['methane_per_capita'].mean(), inplace = True)
predict_temp_df['nitrous_oxide'].fillna(predict_temp_df['nitrous_oxide'].mean(), inplace = True)
predict_temp_df['nitrous_oxide_per_capita'].fillna(predict_temp_df['nitrous_oxide_per_capita'].mean(), inplace = True)
predict_temp_df['gdp'].fillna(predict_temp_df['gdp'].mean(), inplace = True)
predict_temp_df['energy_per_gdp'].fillna(predict_temp_df['energy_per_gdp'].mean(), inplace = True)
predict_temp_df['co2_growth_prct'].fillna(predict_temp_df['co2_growth_prct'].mean(), inplace = True)

predict_temp_df.isnull().sum()

country                        0
year                           0
co2                            0
co2_per_capita                 0
cement_co2                     0
cement_co2_per_capita          0
coal_co2                       0
coal_co2_per_capita            0
flaring_co2                    0
gas_co2                        0
gas_co2_per_capita             0
oil_co2                        0
oil_co2_per_capita             0
co2_growth_prct                0
co2_per_gdp                    0
methane                        0
methane_per_capita             0
nitrous_oxide                  0
nitrous_oxide_per_capita       0
population                     0
gdp                            0
energy_per_gdp                 0
Avg Temperature                0
Avg Temperature Uncertainty    0
dtype: int64

In [6]:
predict_temp_df.dtypes

country                         object
year                             int64
co2                            float64
co2_per_capita                 float64
cement_co2                     float64
cement_co2_per_capita          float64
coal_co2                       float64
coal_co2_per_capita            float64
flaring_co2                    float64
gas_co2                        float64
gas_co2_per_capita             float64
oil_co2                        float64
oil_co2_per_capita             float64
co2_growth_prct                float64
co2_per_gdp                    float64
methane                        float64
methane_per_capita             float64
nitrous_oxide                  float64
nitrous_oxide_per_capita       float64
population                       int64
gdp                            float64
energy_per_gdp                 float64
Avg Temperature                float64
Avg Temperature Uncertainty    float64
dtype: object

In [7]:
# Drop the non-beneficial ID columns
predict_temp_df = predict_temp_df.drop(columns=["country", "year"], axis=1)

In [8]:
# Renaming the DataFrame to help fit column name in the Y and X features data
predict_temp_df.rename(columns={'Avg Temperature': 'AvgTemperature'}, inplace=True)

In [18]:
predict_temp_df['AvgTemperature'] = predict_temp_df['AvgTemperature'].astype('int64')

In [19]:
# Remove Avg Temperature outcome target from features data
y = predict_temp_df.AvgTemperature
X = predict_temp_df.drop(columns="AvgTemperature")

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
predict_temp_df

Unnamed: 0,co2,co2_per_capita,cement_co2,cement_co2_per_capita,coal_co2,coal_co2_per_capita,flaring_co2,gas_co2,gas_co2_per_capita,oil_co2,...,co2_per_gdp,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,population,gdp,energy_per_gdp,AvgTemperature,Avg Temperature Uncertainty
0,0.253,0.042,18.398612,0.085731,0.253,0.042,27.881063,497.198833,2.186538,925.551682,...,0.718742,688.9625,2.4435,259.413333,0.916625,6000000,3.055051e+12,2.602592,12,3.205628
1,0.267,0.044,18.398612,0.085731,0.267,0.044,27.881063,497.198833,2.186538,925.551682,...,0.718742,688.9625,2.4435,259.413333,0.916625,6113782,3.055051e+12,2.602592,12,2.895417
2,0.289,0.046,18.398612,0.085731,0.289,0.046,27.881063,497.198833,2.186538,925.551682,...,0.718742,688.9625,2.4435,259.413333,0.916625,6229723,3.055051e+12,2.602592,12,3.065703
3,0.297,0.047,18.398612,0.085731,0.297,0.047,27.881063,497.198833,2.186538,925.551682,...,0.718742,688.9625,2.4435,259.413333,0.916625,6347862,3.055051e+12,2.602592,12,3.065032
4,0.333,0.052,18.398612,0.085731,0.333,0.052,27.881063,497.198833,2.186538,925.551682,...,0.718742,688.9625,2.4435,259.413333,0.916625,6468241,3.055051e+12,2.602592,12,3.379515
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,5478.210,17.885,29.615000,0.097000,1905.943,6.222,39.071000,1245.742000,4.067000,2233.033000,...,0.368000,649.8900,2.1220,241.810000,0.789000,306307565,1.489429e+13,1.676000,11,0.220114
210,5675.786,18.368,31.449000,0.102000,2013.304,6.515,41.343000,1308.832000,4.236000,2250.504000,...,0.372000,650.3800,2.1050,247.170000,0.800000,309011469,1.527062e+13,1.688000,11,0.211351
211,5540.173,17.781,32.208000,0.103000,1903.555,6.109,45.542000,1327.528000,4.261000,2201.747000,...,0.357000,628.7300,2.0180,255.550000,0.820000,311584051,1.550930e+13,1.650000,11,0.232825
212,5338.698,17.000,35.270000,0.112000,1684.037,5.362,48.883000,1388.255000,4.421000,2152.124000,...,0.337000,618.7100,1.9700,246.420000,0.785000,314043885,1.584964e+13,1.567000,12,0.269340


In [20]:
# Preprocess numerical data for neural network

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [23]:
# Define the logistic regression model
log_classifier = LogisticRegression(solver="lbfgs",max_iter=200)

# Train the model
log_classifier.fit(X_train,y_train)

# Evaluate the model
y_pred = log_classifier.predict(X_test)
print(f" Logistic regression model accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Logistic regression model accuracy: 0.278


In [24]:
# Define the basic neural network model
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=16, activation="relu", input_dim=8))
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=100)

# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100


ValueError: in user code:

    File "/Applications/anaconda3/envs/PythonData/lib/python3.7/site-packages/keras/engine/training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "/Applications/anaconda3/envs/PythonData/lib/python3.7/site-packages/keras/engine/training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Applications/anaconda3/envs/PythonData/lib/python3.7/site-packages/keras/engine/training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "/Applications/anaconda3/envs/PythonData/lib/python3.7/site-packages/keras/engine/training.py", line 859, in train_step
        y_pred = self(x, training=True)
    File "/Applications/anaconda3/envs/PythonData/lib/python3.7/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Applications/anaconda3/envs/PythonData/lib/python3.7/site-packages/keras/engine/input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "sequential_1" is incompatible with the layer: expected shape=(None, 8), found shape=(32, 21)
