# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [2]:
data = pd.read_csv('D:\MLprojects\gold_price.csv')

In [3]:
data.head()

Unnamed: 0,Date,USD (AM),USD (PM),GBP (AM),GBP (PM),EURO (AM),EURO (PM)
0,2001-01-02,272.8,271.1,183.026,181.617,288.677,287.334
1,2001-01-03,269.0,267.15,178.916,177.39,281.823,281.655
2,2001-01-04,268.75,267.1,178.869,178.352,282.538,282.049
3,2001-01-05,268.0,267.4,178.488,178.148,280.775,280.882
4,2001-01-08,268.6,268.3,178.769,178.664,282.41,282.481


# Initial Data Exploration 

In [4]:
%matplotlib inline

In [5]:
data.columns

Index(['Date', 'USD (AM)', 'USD (PM)', 'GBP (AM)', 'GBP (PM)', 'EURO (AM)',
       'EURO (PM)'],
      dtype='object')

In [6]:
data.dtypes

Date          object
USD (AM)     float64
USD (PM)     float64
GBP (AM)     float64
GBP (PM)     float64
EURO (AM)    float64
EURO (PM)    float64
dtype: object

In [7]:
data.describe()

Unnamed: 0,USD (AM),USD (PM),GBP (AM),GBP (PM),EURO (AM),EURO (PM)
count,4718.0,4682.0,4718.0,4682.0,4718.0,4682.0
mean,959.990812,959.728684,625.068432,624.921846,773.892791,773.843716
std,449.456217,449.487106,324.491391,324.544149,351.555806,351.696708
min,256.7,255.95,176.572,176.441,276.711,277.667
25%,449.1125,448.6125,244.57675,244.4635,361.3935,361.33875
50%,1113.125,1112.275,717.145,716.2105,874.3545,875.1415
75%,1293.75,1294.225,939.0655,939.2665,1085.96775,1085.81625
max,1896.5,1895.0,1265.9,1265.01,1389.89,1392.03


In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4718 entries, 0 to 4717
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       4718 non-null   object 
 1   USD (AM)   4718 non-null   float64
 2   USD (PM)   4682 non-null   float64
 3   GBP (AM)   4718 non-null   float64
 4   GBP (PM)   4682 non-null   float64
 5   EURO (AM)  4718 non-null   float64
 6   EURO (PM)  4682 non-null   float64
dtypes: float64(6), object(1)
memory usage: 258.1+ KB


In [9]:
data.duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
4713    False
4714    False
4715    False
4716    False
4717    False
Length: 4718, dtype: bool

In [10]:
data.drop_duplicates

<bound method DataFrame.drop_duplicates of             Date  USD (AM)  USD (PM)  GBP (AM)  GBP (PM)  EURO (AM)  EURO (PM)
0     2001-01-02    272.80    271.10   183.026   181.617    288.677    287.334
1     2001-01-03    269.00    267.15   178.916   177.390    281.823    281.655
2     2001-01-04    268.75    267.10   178.869   178.352    282.538    282.049
3     2001-01-05    268.00    267.40   178.488   178.148    280.775    280.882
4     2001-01-08    268.60    268.30   178.769   178.664    282.410    282.481
...          ...       ...       ...       ...       ...        ...        ...
4713  2019-08-27   1531.85   1532.95  1250.910  1247.510   1378.970   1380.880
4714  2019-08-28   1541.75   1537.15  1263.310  1258.770   1389.890   1387.430
4715  2019-08-29   1536.65   1540.20  1260.510  1262.960   1387.290   1392.030
4716  2019-08-30   1526.55   1528.40  1253.140  1251.150   1382.750   1383.510
4717  2019-09-02   1523.35   1525.95  1260.420  1265.010   1388.690   1391.510

[4718 ro

In [11]:
data.duplicated().sum()

0

In [12]:
data.isnull().sum()

Date          0
USD (AM)      0
USD (PM)     36
GBP (AM)      0
GBP (PM)     36
EURO (AM)     0
EURO (PM)    36
dtype: int64

In [13]:
len(data)

4718

# Task 2 : Create Train & Test Sets

In [14]:
train_data = data[:3000]
train_data

Unnamed: 0,Date,USD (AM),USD (PM),GBP (AM),GBP (PM),EURO (AM),EURO (PM)
0,2001-01-02,272.80,271.10,183.026,181.617,288.677,287.334
1,2001-01-03,269.00,267.15,178.916,177.390,281.823,281.655
2,2001-01-04,268.75,267.10,178.869,178.352,282.538,282.049
3,2001-01-05,268.00,267.40,178.488,178.148,280.775,280.882
4,2001-01-08,268.60,268.30,178.769,178.664,282.410,282.481
...,...,...,...,...,...,...,...
2995,2012-11-07,1730.50,1715.25,1080.752,1073.037,1345.855,1344.345
2996,2012-11-08,1715.00,1717.00,1075.842,1074.401,1347.423,1347.301
2997,2012-11-09,1732.75,1738.25,1085.547,1091.180,1362.225,1366.549
2998,2012-11-12,1735.75,1735.25,1091.392,1093.071,1365.442,1364.405


In [15]:
test_data = data[3000:]
test_data

Unnamed: 0,Date,USD (AM),USD (PM),GBP (AM),GBP (PM),EURO (AM),EURO (PM)
3000,2012-11-14,1724.50,1725.75,1085.205,1087.155,1353.080,1354.486
3001,2012-11-15,1723.50,1710.00,1087.656,1078.048,1351.447,1337.191
3002,2012-11-16,1710.00,1713.50,1077.912,1080.255,1342.756,1346.562
3003,2012-11-19,1723.25,1730.50,1083.669,1087.954,1349.663,1353.433
3004,2012-11-20,1734.00,1732.25,1089.059,1088.302,1354.053,1353.215
...,...,...,...,...,...,...,...
4713,2019-08-27,1531.85,1532.95,1250.910,1247.510,1378.970,1380.880
4714,2019-08-28,1541.75,1537.15,1263.310,1258.770,1389.890,1387.430
4715,2019-08-29,1536.65,1540.20,1260.510,1262.960,1387.290,1392.030
4716,2019-08-30,1526.55,1528.40,1253.140,1251.150,1382.750,1383.510


In [16]:
X_train, X_test, y_train, y_test = train_test_split(data.drop('USD (AM)' , axis =1),data['USD (AM)'] ,test_size=0.2,random_state=0)
X_train.shape , X_test.shape

((3774, 6), (944, 6))

In [17]:
X_train.head()

Unnamed: 0,Date,USD (PM),GBP (AM),GBP (PM),EURO (AM),EURO (PM)
1465,2006-10-18,594.0,318.333,317.97,474.681,474.668
1377,2006-06-15,569.5,311.027,307.838,454.96,450.732
34,2001-02-19,259.45,178.279,179.055,281.131,281.613
286,2002-02-18,297.35,208.159,208.155,341.924,341.428
465,2002-11-04,317.55,203.725,204.409,318.794,319.403
