<a href="https://colab.research.google.com/github/prasoon1506/Analysis-of-Attitudes-towards-Carbon-Pricing-in-India-A-Sampling-and-Hypothesis-Testing-Study/blob/main/ml/cc/exercises/linear_regression_with_synthetic_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
!pip install np_utils

In [None]:
(X_train,y_train),(X_test,y_test)=mnist.load_data()

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
for i in range (9):
    plt.subplot(330+i+1)
    num=random.randint(0,len(X_train))
    plt.imshow(X_train[num],cmap=plt.get_cmap('gray'),interpolation="none")
    plt.title("Class{}".format(y_train[num]))
plt.tight_layout()


In [None]:
X_train=X_train.reshape(60000,784)
X_test=X_test.reshape(10000,784)
X_train=X_train.astype('float32')
X_test=X_test.astype('float32')
X_train/=255
X_test/=255
print(X_train.shape)
print(X_test.shape)

In [None]:
num_classes=10
from keras.utils import to_categorical
y_train=to_categorical(y_train,num_classes)
y_test=to_categorical(y_test,num_classes)

In [None]:
model=Sequential()

In [None]:
model.add(Dense(512,input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dropout(0.2))


In [None]:
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))

In [None]:
model.add(Dense (10))
model.add(Activation('softmax'))

In [None]:
model.summary()

In [None]:
from keras.utils import plot_model
plot_model(model,to_file="model_chart.png",show_shapes=True,show_layer_names=True)
from IPython.display import Image
Image("model_chart.png")

In [None]:
model.compile(loss="categorical_crossentropy",optimizer="adam",metrics=["accuracy"])

In [None]:
history=model.fit (X_train,y_train,batch_size=128,epochs=10,verbose=1)

In [None]:
score=model.evaluate(X_test,y_test)
print("Test accuracy:",score[1])

In [None]:
fig=plt.figure()
plt.subplot(2,1,1)
plt.plot(history.history["accuracy"],color="green")
plt.title("model accuracy")
plt.ylabel("accuracy")
plt.xlabel("epoch")

plt.legend(["train","test"],loc="lower right")

In [None]:
plt.subplot(2,1,2)
plt.plot(history.history["loss"],color="red")
plt.title("model loss")
plt.ylabel("loss")
plt.xlabel("epoch")
plt.legend(["train","test"],loc="upper right")


In [None]:
test_data=pd.read_csv("/content/sample_data/mnist_test.csv",delimiter=",",header=0,usecols=[x for x in range(0,784)])
results = model.predict(test_data)

In [None]:
results=np.argmax(results,axis=1)
results=pd.Series(results,name="Label")
submission=pd.concat([pd.Series(range(1,28001),name="ImageId"),results],axis=1)
submission.to_csv("submission.csv",index=False)

In [None]:
print(submission)

In [None]:

predict_x=model.predict(X_test)
classes_x=np.argmax(predict_x,axis=1)

# Convert y_test to 1D array of class labels if it's one-hot encoded
y_test_labels = np.argmax(y_test, axis=1)

correct_indices=np.nonzero(classes_x==y_test_labels)[0]
incorrect_indices=np.nonzero(classes_x!=y_test_labels)[0]

In [None]:
plt.figure()
for i, correct in enumerate(correct_indices[:9]):
    plt.subplot(3,3,i+1)
    plt.imshow(X_test[correct].reshape(28,28),cmap="gray",interpolation="none")
    plt.title("Predicted {}, Class {}".format(classes_x[correct],y_test_labels[correct]))
plt.tight_layout()

In [None]:
plt.figure()
for i, incorrect in enumerate(incorrect_indices[:9]):
    plt.subplot(3,3,i+1)
    plt.imshow(X_test[incorrect].reshape(28,28),cmap="gray",interpolation="none")
    plt.title("Predicted {}, Class {}".format(classes_x[incorrect],y_test_labels[incorrect]))
plt.tight_layout()

In [None]:
df_2=pd.read_json('/content/sample_data/anscombe.json')
print(df_2)


In [None]:
Series1 = df_2[df_2['Series'] == 'I']
Series2 = df_2[df_2['Series'] == 'II']
Series3 = df_2[df_2['Series'] == 'III']
Series4 = df_2[df_2['Series'] == 'IV']





# mean values (x-bar)
x1_mean = Series1['X'].mean()
x2_mean = Series2['X'].mean()
x3_mean = Series3['X'].mean()
x4_mean = Series4['X'].mean()

# y-bar
y1_mean = Series1['Y'].mean()
y2_mean = Series2['Y'].mean()
y3_mean = Series3['Y'].mean()
y4_mean = Series4['Y'].mean()


# Standard deviation values (x-bar)
x1_std = Series1['X'].std()
x2_std = Series2['X'].std()
x3_std = Series3['X'].std()
x4_std = Series4['X'].std()

# Standard deviation values (y-bar)
y1_std = Series1['Y'].std()
y2_std = Series2['Y'].std()
y3_std = Series3['Y'].std()
y4_std = Series4['Y'].std()

# Correlation
correlation_x1y1 = np.corrcoef(Series1['X'],Series1['Y'])[0,1]
correlation_x2y2 = np.corrcoef(Series2['X'],Series2['Y'])[0,1]
correlation_x3y3 = np.corrcoef(Series3['X'],Series3['Y'])[0,1]
correlation_x4y4 = np.corrcoef(Series4['X'],Series4['Y'])[0,1]

# Linear Regression slope and intercept
m1,c1 = np.polyfit(Series1['X'],Series1['Y'], 1)
m2,c2 = np.polyfit(Series2['X'],Series2['Y'], 1)
m3,c3 = np.polyfit(Series3['X'],Series3['Y'], 1)
m4,c4 = np.polyfit(Series4['X'],Series4['Y'], 1)

# Residual sum of squares error
RSSY_1 = ((Series1['Y'] - (m1*Series1['X']+c1))**2).sum()
RSSY_2 = ((Series2['Y'] - (m2*Series2['X']+c2))**2).sum()
RSSY_3 = ((Series3['Y'] - (m3*Series3['X']+c3))**2).sum()
RSSY_4 = ((Series4['Y'] - (m4*Series4['X']+c4))**2).sum()

# Total sum of squares
TSS_1 = ((Series1['Y'] - y1_mean)**2).sum()
TSS_2 = ((Series2['Y'] - y2_mean)**2).sum()
TSS_3 = ((Series3['Y'] - y3_mean)**2).sum()
TSS_4 = ((Series4['Y'] - y4_mean)**2).sum()

# R squared (coefficient of determination)
R2_1 = 1 - (RSSY_1 / TSS_1)
R2_2 = 1 - (RSSY_2 / TSS_2)
R2_3 = 1 - (RSSY_3 / TSS_3)
R2_4 = 1 - (RSSY_4 / TSS_4)

# Create a pandas dataframe to represent the summary statistics
summary_stats = pd.DataFrame({'Mean_x': [x1_mean, x2_mean, x3_mean, x4_mean],
							'Variance_x': [x1_std**2, x2_std**2, x3_std**2, x4_std**2],
							'Mean_y': [y1_mean, y2_mean, y3_mean, y4_mean],
							'Variance_y': [y1_std**2, y2_std**2, y3_std**2, y4_std**2],
							'Correlation': [correlation_x1y1, correlation_x2y2, correlation_x3y3, correlation_x4y4],
							'Linear Regression slope': [m1, m2, m3, m4],
							'Linear Regression intercept': [c1, c2, c3, c4]},
index = ['I', 'II', 'III', 'IV'])
print(summary_stats.T)


In [None]:
# plot all four plots
fig, axs = plt.subplots(2, 2, figsize=(18,12), dpi=500)

axs[0, 0].set_title('Dataset I', fontsize=20)
axs[0, 0].set_xlabel('X', fontsize=13)
axs[0, 0].set_ylabel('Y', fontsize=13)
axs[0, 0].plot(Series1['X'], Series1['Y'], 'go')
axs[0, 0].plot(Series1['X'], m1*Series1['X']+c1,'r',label='Y='+str(round(m1,2))+'x +'+str(round(c1,2)))
axs[0, 0].legend(loc='best',fontsize=16)

axs[0, 1].set_title('Dataset II', fontsize=20)
axs[0, 1].set_xlabel('X', fontsize=13)
axs[0, 1].set_ylabel('Y', fontsize=13)
axs[0, 1].plot(Series2['X'], Series2['Y'], 'go')
axs[0, 1].plot(Series2['X'], m2*Series2['X']+c2,'r',label='Y='+str(round(m2,2))+'x +'+str(round(c2,2)))
axs[0, 1].legend(loc='best',fontsize=16)

axs[1, 0].set_title('Dataset III', fontsize=20)
axs[1, 0].set_xlabel('X', fontsize=13)
axs[1, 0].set_ylabel('Y', fontsize=13)
axs[1, 0].plot(Series3['X'], Series3['Y'], 'go')
axs[1, 0].plot(Series3['X'], m3*Series3['X']+c3,'r',label='Y='+str(round(m3,2))+'x +'+str(round(c3,2)))
axs[1, 0].legend(loc='best',fontsize=16)

axs[1, 1].set_title('Dataset IV', fontsize=20)
axs[1, 1].set_xlabel('X', fontsize=13)
axs[1, 1].set_ylabel('Y', fontsize=13)
axs[1, 1].plot(Series4['X'], Series4['Y'], 'go')
axs[1, 1].plot(Series4['X'], m4*Series4['X']+c4,'r',label='Y='+str(round(m4,2))+'x +'+str(round(c4,2)))
axs[1, 1].legend(loc='best',fontsize=16)





plt.show()
