In [1]:
!pip install tensorflow==2.6.0
!pip install Cython==0.29.24
!pip install numpy==1.19.5
!pip install matplotlib==3.2.2 
!pip install seaborn==0.11.2 
!pip install scipy==1.4.1 
!pip install scikit-learn==0.22.2.post1 
!pip install scikit-image==0.16.2 
!pip install pandas==1.1.5 
!pip install pyyaml 
!pip install imutilstqdm==4.62.3 
!pip install psutil==5.4.8 
!pip install h5py==3.1.0

Collecting tensorflow==2.6.0
  Downloading tensorflow-2.6.0-cp37-cp37m-manylinux2010_x86_64.whl (458.3 MB)
[K     |████████████████████████████████| 458.3 MB 10 kB/s 
Collecting clang~=5.0
  Downloading clang-5.0.tar.gz (30 kB)
Collecting typing-extensions~=3.7.4
  Downloading typing_extensions-3.7.4.3-py3-none-any.whl (22 kB)
Collecting wrapt~=1.12.1
  Downloading wrapt-1.12.1.tar.gz (27 kB)
Collecting flatbuffers~=1.12.0
  Downloading flatbuffers-1.12-py2.py3-none-any.whl (15 kB)
Building wheels for collected packages: clang, wrapt
  Building wheel for clang (setup.py) ... [?25l[?25hdone
  Created wheel for clang: filename=clang-5.0-py3-none-any.whl size=30692 sha256=b0e8e5953a860cbaba3db42ce7590d9afd4e3b71958b0128f4344d1364e1f182
  Stored in directory: /root/.cache/pip/wheels/98/91/04/971b4c587cf47ae952b108949b46926f426c02832d120a082a
  Building wheel for wrapt (setup.py) ... [?25l[?25hdone
  Created wheel for wrapt: filename=wrapt-1.12.1-cp37-cp37m-linux_x86_64.whl size=68720 

In [2]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures

%matplotlib inline

In [3]:
onColab = True
if onColab:
  from google.colab import drive
  drive.mount('/gdrive')
  %cd /gdrive/MyDrive/University/ANN/CHALLENGE2
  !ls

Mounted at /gdrive
/gdrive/MyDrive/University/ANN/CHALLENGE2
analysis.ipynb			  multimodel_1
baseline			  multimodel_2
baseline_2			  multi_model_2.ipynb
baseline_2_ckpt			  multi_model.ipynb
baseline_3			  multimodel_model.ipynb
baseline_3_ckpt			  multimodel_with_baseline
baseline_3_forSub		  multimodel_with_baseline_2
baseline_3_smooth_training.ipynb  multimodel_with_baseline_3
baseline_3_training_smooth	  my_net
baseline_3_training_smooth_ckpt   my_net_ckpt
baseline_4			  my_net.ipynb
baseline_4_ckpt			  simple
baseline_5			  simple_ckpt
baseline_5_ckpt			  smooth_with_fft.ipynb
baseline_ckpt			  smooth_with_regression.ipynb
baseline.ipynb			  Training.csv
inno_training_smooth		  Training_smooth.csv
model.png


In [4]:
dataset = pd.read_csv('./Training.csv')

In [5]:
def chunk(x, size, stride):
  """
  divide the dataset in chunks. It keeps the right-most elements and discard the left ones, if size and stride are not
  compatible with dimensions
  """
  ret = []
  for i in range(len(x) - size, -1, -stride):
    ret.append(x[i: i + size])
  ret.reverse()
  return ret
  ## allignment is on the RIGHT

# # test
# x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# size = 3
# stride = 2
# print(chunk(x, size, stride))
# assert chunk(x, size, stride) == [[2, 3, 4], [4, 5, 6], [6, 7, 8], [8, 9, 10]]

# x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# size = 4
# stride = 2
# print(chunk(x, size, stride))
# assert chunk(x, size, stride) == [[1, 2, 3, 4],[3, 4, 5, 6],[5, 6, 7, 8],[7, 8, 9, 10]]

# x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# size = 4
# stride = 3
# print(chunk(x, size, stride))
# assert chunk(x, size, stride) == [[1, 2, 3, 4],[4, 5, 6, 7],[7, 8, 9, 10]]

In [6]:
def rebuild(y, size, stride):
  """
  rebuild the original (if no cut were made) after the chunk function
  """
  s = size - stride
  ret = [0 for i in range(len(y[0]) * len(y) - (s * (len(y) - 1)))]

  for i in range(len(y[0])):
      ret[i] = y[0][i]

  ind = size
  for i in y[1:]:
    l = s
    for j in i:
      if l > 0:
        ret[ind - l] = (ret[ind - l] + j) / 2
      else:
        ret[ind - l] = j
      l -= 1
    ind += size - s
  return ret

# # test
# x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# size = 4
# stride = 3
# print(rebuild(chunk(x, size, stride), size, stride))
# assert rebuild(chunk(x, size, stride), size, stride) == x

# x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# size = 4
# stride = 2
# print(rebuild(chunk(x, size, stride), size, stride))
# assert rebuild(chunk(x, size, stride), size, stride) == x

In [7]:
def getRegForChunk(chunk, degree, draw=False):
  """
  chunk: a 1D array of data
  degree: a single number, or a tuple. If it's a tuple it has to be (min_degree, max_degree), so that it looks for the best regression in that range
  draw: draws the generated regression
  """
  X = np.arange(len(chunk)).reshape(-1, 1)

  poly = PolynomialFeatures(degree=degree[1])
  poly_features = poly.fit_transform(X)
  poly_reg_model = linear_model.LinearRegression()
  poly_reg_model.fit(poly_features, chunk)
  y_predicted = poly_reg_model.predict(poly_features)
  
  if draw:
    plt.figure(figsize=(10, 6))
    plt.scatter(X, chunk)
    plt.plot(X, y_predicted, c='red')
    plt.show()
  
  return y_predicted

def allChunks(chunks, degree, draw=False):
  ret = []
  for c in chunks:
    ret.append(getRegForChunk(c, degree, draw))
  return ret

# # test
# x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# size = 4
# stride = 3
# chunks = chunk(x, size, stride)
# y_pred = getRegForChunk(chunks[0], (1, 5), True)

# # test
# x = [2, 4, 9, 16, 25, 36, 49]
# size = 4
# stride = 3
# chunks = chunk(x, size, stride)
# y_pred = getRegForChunk(chunks[0], (1, 5), True)

In [8]:
def getRegression(x, size, stride, degree):
  y = chunk(x, size, stride)
  y = allChunks(y, degree)
  r = np.array(rebuild(y, size, stride))
  return r

In [9]:
# # Test on hype root: keep size small, otherwise 
# x = dataset['Hype root']
# size = 60
# stride = 45
# degree = (6, 15)

# r = getRegression(x, size, stride, degree)

# discarded = len(x) - np.array(r).shape[0] 
# print("we discarded ", discarded, "initial points")

# # decide which data to visualize:
# inf = 40
# sup = 400

# x_ax = np.arange(sup - inf)
# plt.figure(figsize=(10, 6))
# plt.scatter(x_ax, x[inf + discarded:sup + discarded])
# plt.plot(x_ax, r[inf:sup], c='red')
# plt.show()

In [10]:
size = 60
stride = 45
degree = (6, 15)

In [11]:
new = {}
for i in dataset:
  new[i] = getRegression(dataset[i], size, stride, degree)

In [12]:
df = pd.DataFrame.from_dict(new)

In [13]:
print(df)

       Sponginess  Wonder level  ...  Soap slipperiness  Hype root
0        7.335392      5.597756  ...          35.672880   4.426402
1        7.335392      5.597756  ...          35.672880   4.426402
2        7.335392      5.597756  ...          35.672880   4.426402
3        7.335392      5.597756  ...          35.672880   4.426402
4        7.335392      5.597756  ...          35.672879   4.426402
...           ...           ...  ...                ...        ...
68500    6.815676      6.866007  ...          19.735387  15.097234
68501    6.627623      6.718198  ...          19.875841  12.712619
68502    6.342047      6.510555  ...          20.046488   7.959358
68503    6.326311      6.489671  ...          20.104337   5.401417
68504    7.518556      7.300109  ...          19.844516  17.218833

[68505 rows x 7 columns]


In [14]:
save = True
if save:
  df.to_csv('Training_smooth2.csv', index=False)