In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
import h5py


import warnings
warnings.filterwarnings("ignore")

In [2]:
# Load the Excel file

file_path = 'E:/Python Programs/EuroAstro/total_combinations.xlsx'
data = pd.read_excel(file_path, sheet_name=None)  # Load all sheets into a dictionary

In [3]:
type(data)

dict

In [4]:
data["Sheet1"]

Unnamed: 0,1st Num,2nd Num,3rd Num,4th Num,5th Num,Unnamed: 5
0,1,2,3,4,5,1 2 3 4 5
1,1,2,3,4,6,1 2 3 4 6
2,1,2,3,4,7,1 2 3 4 7
3,1,2,3,4,8,1 2 3 4 8
4,1,2,3,4,9,1 2 3 4 9
...,...,...,...,...,...,...
211871,1,46,47,48,49,1 46 47 48 49
211872,1,46,47,48,50,1 46 47 48 50
211873,1,46,47,49,50,1 46 47 49 50
211874,1,46,48,49,50,1 46 48 49 50


In [5]:
# Combine data from all sheets into a single DataFrame

all_data = pd.concat(data.values())

In [6]:
all_data

Unnamed: 0,1st Num,2nd Num,3rd Num,4th Num,5th Num,Unnamed: 5
0,1,2,3,4,5,1 2 3 4 5
1,1,2,3,4,6,1 2 3 4 6
2,1,2,3,4,7,1 2 3 4 7
3,1,2,3,4,8,1 2 3 4 8
4,1,2,3,4,9,1 2 3 4 9
...,...,...,...,...,...,...
211871,45,46,47,48,50,45 46 47 48 50
211872,45,46,47,49,50,45 46 47 49 50
211873,45,46,48,49,50,45 46 48 49 50
211874,45,47,48,49,50,45 47 48 49 50


In [7]:
all_data.shape

(2118760, 6)

In [8]:
all_data.reset_index(inplace=True)

In [9]:
all_data.shape

(2118760, 7)

In [10]:
all_data

Unnamed: 0,index,1st Num,2nd Num,3rd Num,4th Num,5th Num,Unnamed: 5
0,0,1,2,3,4,5,1 2 3 4 5
1,1,1,2,3,4,6,1 2 3 4 6
2,2,1,2,3,4,7,1 2 3 4 7
3,3,1,2,3,4,8,1 2 3 4 8
4,4,1,2,3,4,9,1 2 3 4 9
...,...,...,...,...,...,...,...
2118755,211871,45,46,47,48,50,45 46 47 48 50
2118756,211872,45,46,47,49,50,45 46 47 49 50
2118757,211873,45,46,48,49,50,45 46 48 49 50
2118758,211874,45,47,48,49,50,45 47 48 49 50


In [11]:
all_data.drop(labels=['index','Unnamed: 5'], axis=1, inplace=True)

In [12]:
all_data

Unnamed: 0,1st Num,2nd Num,3rd Num,4th Num,5th Num
0,1,2,3,4,5
1,1,2,3,4,6
2,1,2,3,4,7
3,1,2,3,4,8
4,1,2,3,4,9
...,...,...,...,...,...
2118755,45,46,47,48,50
2118756,45,46,47,49,50
2118757,45,46,48,49,50
2118758,45,47,48,49,50


In [13]:
all_data.shape

(2118760, 5)

In [14]:
# Convert DataFrame to numpy array for easier manipulation

combinations = all_data.values

In [15]:
combinations

array([[ 1,  2,  3,  4,  5],
       [ 1,  2,  3,  4,  6],
       [ 1,  2,  3,  4,  7],
       ...,
       [45, 46, 48, 49, 50],
       [45, 47, 48, 49, 50],
       [46, 47, 48, 49, 50]], dtype=int64)

In [16]:
# Normalize the data (optional, depending on the model's needs)

combinations = combinations / 50.0  # Since the numbers range from 1 to 50

In [17]:
combinations

array([[0.02, 0.04, 0.06, 0.08, 0.1 ],
       [0.02, 0.04, 0.06, 0.08, 0.12],
       [0.02, 0.04, 0.06, 0.08, 0.14],
       ...,
       [0.9 , 0.92, 0.96, 0.98, 1.  ],
       [0.9 , 0.94, 0.96, 0.98, 1.  ],
       [0.92, 0.94, 0.96, 0.98, 1.  ]])

In [18]:
prev_occurred = pd.read_csv("E:/Python Programs/EuroAstro/Previous_Combinations.csv")

In [19]:
prev_occurred

Unnamed: 0,1st Num,2nd Num,3rd Num,4th Num,5th Num,Unnamed: 5
0,3,4,27,29,37,
1,15,19,22,46,49,
2,1,4,24,34,36,
3,1,9,23,25,40,
4,12,20,21,45,48,
...,...,...,...,...,...,...
1687,15,22,35,44,48,
1688,4,8,10,16,34,
1689,4,19,23,35,37,
1690,9,25,28,37,38,


In [20]:
prev_occurred.drop(labels=['Unnamed: 5'], axis=1, inplace=True)

In [21]:
prev_occurred

Unnamed: 0,1st Num,2nd Num,3rd Num,4th Num,5th Num
0,3,4,27,29,37
1,15,19,22,46,49
2,1,4,24,34,36
3,1,9,23,25,40
4,12,20,21,45,48
...,...,...,...,...,...
1687,15,22,35,44,48
1688,4,8,10,16,34
1689,4,19,23,35,37
1690,9,25,28,37,38


In [22]:
# Convert DataFrame to numpy array for easier manipulation

Previous_combinations = prev_occurred.values

In [23]:
Previous_combinations

array([[ 3,  4, 27, 29, 37],
       [15, 19, 22, 46, 49],
       [ 1,  4, 24, 34, 36],
       ...,
       [ 4, 19, 23, 35, 37],
       [ 9, 25, 28, 37, 38],
       [ 5,  7, 12, 33, 46]], dtype=int64)

In [24]:
# Normalize the data (optional, depending on the model's needs)

Previous_combinations = Previous_combinations / 50.0  # Since the numbers range from 1 to 50

In [25]:
Previous_combinations

array([[0.06, 0.08, 0.54, 0.58, 0.74],
       [0.3 , 0.38, 0.44, 0.92, 0.98],
       [0.02, 0.08, 0.48, 0.68, 0.72],
       ...,
       [0.08, 0.38, 0.46, 0.7 , 0.74],
       [0.18, 0.5 , 0.56, 0.74, 0.76],
       [0.1 , 0.14, 0.24, 0.66, 0.92]])

In [26]:
# Ensuring data is in the correct shape

X = np.squeeze(combinations)  # Remove single-dimensional entries from combinations
y = np.squeeze(Previous_combinations)  # Remove single-dimensional entries from Previous_combinations

In [27]:
X.shape

(2118760, 5)

In [28]:
y.shape

(1692, 5)

In [29]:
# Reshaping y to be in the format (samples, time steps, features)
# LSTM expects the input shape to be (number of samples, number of time steps, number of features)
# Here, the number of features is 1, since each number in the combination can be treated as a feature.

# Reshaping y to add a dimension for features (required for LSTM)
y_reshaped = y.reshape((y.shape[0], y.shape[1], 1))

In [80]:
y_reshaped.shape

(1692, 5, 1)

In [83]:
y_reshaped

array([[[0.06],
        [0.08],
        [0.54],
        [0.58],
        [0.74]],

       [[0.3 ],
        [0.38],
        [0.44],
        [0.92],
        [0.98]],

       [[0.02],
        [0.08],
        [0.48],
        [0.68],
        [0.72]],

       ...,

       [[0.08],
        [0.38],
        [0.46],
        [0.7 ],
        [0.74]],

       [[0.18],
        [0.5 ],
        [0.56],
        [0.74],
        [0.76]],

       [[0.1 ],
        [0.14],
        [0.24],
        [0.66],
        [0.92]]])

In [57]:
# Build the LSTM model
model = Sequential()

# Add an LSTM layer with 256 units
model.add(LSTM(256, activation='relu', input_shape=(y_reshaped.shape[1], y_reshaped.shape[2])))

# Add a Dense layer with 128 units and ReLU activation
model.add(Dense(128, activation='relu'))

# Add a Dense layer with 64 units and ReLU activation
model.add(Dense(64, activation='relu'))

# Add a Dense layer with 32 units and ReLU activation
model.add(Dense(32, activation='relu'))

# Output layer with 5 units (one for each number in the combination)
model.add(Dense(5))

In [58]:
# Compile the model using Mean Squared Error as the loss function and Adam optimizer

model.compile(optimizer='adam', loss='mse')

In [59]:
model.summary()

In [60]:
# Training the model

history = model.fit(y_reshaped, y, epochs=100, verbose=1)

Epoch 1/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.1819
Epoch 2/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0536
Epoch 3/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0340
Epoch 4/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0315
Epoch 5/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0299
Epoch 6/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0236
Epoch 7/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0210
Epoch 8/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0198
Epoch 9/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0140
Epoch 10/100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0110

In [81]:
y_reshaped[-1]

array([[0.1 ],
       [0.14],
       [0.24],
       [0.66],
       [0.92]])

In [82]:
y_reshaped[-1].reshape(1, y_reshaped.shape[1], 1)

array([[[0.1 ],
        [0.14],
        [0.24],
        [0.66],
        [0.92]]])

In [73]:
# Make a prediction using the last combination in y

predicted_combination = model.predict(y_reshaped[-1].reshape(1, y_reshaped.shape[1], 1))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step


In [79]:
predicted_combination

array([[0.10448231, 0.14340037, 0.23803976, 0.6581539 , 0.9176097 ]],
      dtype=float32)

In [84]:
predicted_combination = np.round(predicted_combination * 50).astype(int)

In [85]:
predicted_combination

array([[ 5,  7, 12, 33, 46]])

In [86]:
# Calculate the difference between the predicted combination and each combination in X

differences = np.abs(X - predicted_combination)

In [87]:
differences

array([[ 4.98,  6.96, 11.94, 32.92, 45.9 ],
       [ 4.98,  6.96, 11.94, 32.92, 45.88],
       [ 4.98,  6.96, 11.94, 32.92, 45.86],
       ...,
       [ 4.1 ,  6.08, 11.04, 32.02, 45.  ],
       [ 4.1 ,  6.06, 11.04, 32.02, 45.  ],
       [ 4.08,  6.06, 11.04, 32.02, 45.  ]])

In [88]:
# Calculate the sum of differences across all 5 numbers for each combination in X

sum_of_differences = np.sum(differences, axis=1)

In [89]:
sum_of_differences

array([102.7 , 102.68, 102.66, ...,  98.24,  98.22,  98.2 ])

In [90]:
# Find the index of the combination in X with the smallest difference

best_match_index = np.argmin(sum_of_differences)

In [91]:
best_match_index

2118759

In [92]:
# Get the best matching combination from X

best_match = X[best_match_index]

In [93]:
print("Best matching combination from X:", best_match)

Best matching combination from X: [0.92 0.94 0.96 0.98 1.  ]


In [95]:
X[2118759]*50

array([46., 47., 48., 49., 50.])