In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from scipy.fft import fft
from sklearn import metrics



In [None]:
df = pd.read_csv('scr-dataset.csv') 
X = np.array(df['x'])
y = np.array(df['y'])

# Plotting the given dataset as a barplot
plt.figure(figsize=(400,50))
plt.bar(df['x'], df['y'])

In [None]:
# Linear and Polynomial Regression yield unsatisfactory predictions 
# with approximately 0.9 RMSE,making them unsuitable for the provided dataset

# Based on the periodic relation observed in the data plot, 
# Fourier Series Regression is suitable for modeling the relationship between x and y
Y_fourier = fft(y)
frequencies = np.fft.fftfreq(len(X), (X[1] - X[0]))  # Assuming a regular grid for X
amplitudes = np.abs(Y_fourier)

# Select relevant frequencies based on amplitudes 
# and a threshold of 0.004 (by tuning for minimising RMSE)
relevant_freq_indices = np.where(amplitudes > 0.004)[0]

# Create new features based on sinusoidal components
sinusoidal_features = np.zeros((len(X), len(relevant_freq_indices) * 2))

for i, freq_index in enumerate(relevant_freq_indices):
    frequency = frequencies[freq_index]
    sinusoidal_features[:, 2 * i] = np.sin(2 * np.pi * frequency * X)
    sinusoidal_features[:, 2 * i + 1] = np.cos(2 * np.pi * frequency * X)
X_combined = np.hstack([X.reshape(-1, 1), sinusoidal_features])

# Split data into training and testing sets - random_state = 42
X_train,X_test,y_train,y_test = train_test_split(X_combined,y,test_size=0.2,random_state=42)

# Train a regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)


In [None]:
print(y_pred)
print(y_test) #Comparing with actual values of y
print(metrics.mean_absolute_error(y_test,y_pred))
print(np.sqrt(metrics.mean_squared_error(y_test,y_pred))) #RMSE

In [None]:
# An RMSE of ~0.0003 indicates excellent performance for this dataset, highlighting
# the model's strong predictive capability

In [None]:
# Y_fourier, frequencies, amplitudes, relevant_freq_indices, sinusoidal_features, 
#X_combined, X_train, y_train, model are already defined 
#so generating the features for x=50 

# New value for x
new_x = np.array([50])

# Extracting the sinusoidal features for the new element
sinusoidal_features_x50 = np.zeros((1, len(relevant_freq_indices) * 2))

# Ensure that relevant_freq_indices is not empty
if len(relevant_freq_indices) > 0:
    for i, freq_index in enumerate(relevant_freq_indices):
        frequency = frequencies[freq_index]
        sinusoidal_features_x50[:, 2 * i] = np.sin(2 * np.pi * frequency * new_x)
        sinusoidal_features_x50[:, 2 * i + 1] = np.cos(2 * np.pi * frequency * new_x)

    # Combine the original features with the new element's features
    X_combined_x50 = np.hstack([new_x.reshape(-1, 1), sinusoidal_features_x50])

    # Make Prediction
    y_pred_x50 = model.predict(X_combined_x50)
    print("Predicted y for x=50:", y_pred_x50[0])
    
else:
    print("Error: No relevant frequencies found, Prediction could not be made.")