In [7]:
import pandas as pd
import time

from sklearn.preprocessing import StandardScaler

In [8]:
# Paths
data_folder = input()
TRAIN_PATH = data_folder + '/train.csv'
TEST_PATH = data_folder + '/test.csv'



In [9]:
train_df = pd.read_csv(TRAIN_PATH)
test_df = pd.read_csv(TEST_PATH)


In [10]:
targets_for_test_df = test_df['close'] / test_df['close'].shift(1)
targets_for_test_df = targets_for_test_df > 1
targets_for_test_df = targets_for_test_df.astype(int).shift(-1)
targets_for_test_df.dropna(inplace=True)


# Majority Guessing

In [11]:
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split

# Assuming 'target' is the column to predict and the rest are features
X = train_df.drop(columns=['target'])
y = train_df['target']

# Split the data into training and validation sets
X_train = X.iloc[:len(X)*4//5]
y_train = y.iloc[:len(y)*4//5]
X_val= X.iloc[len(X)*4//5:]
y_val = y.iloc[len(y)*4//5:]

# Check if there are more Ones or Zeroes on train_df[target]
majority_class = train_df['target'].value_counts().idxmax()

y_pred = [majority_class] * len(y_val)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f'Validation Accuracy: {accuracy}')

# Calculate F1 macro score
f1_macro = f1_score(y_val, y_pred, average='macro')
print(f'Validation F1 Macro Score: {f1_macro}')



Validation Accuracy: 0.531152824108102
Validation F1 Macro Score: 0.3468973284345402


In [16]:
# Evaluate the X_test predictions using the targets_for_test_df

# Ensure the test data is preprocessed in the same way as the training data

X_test = test_df.drop(columns=['row_id'])

# Make predictions on the test data
test_prediction = [majority_class] * len(X_test)
# Calculate accuracy
accuracy = accuracy_score(targets_for_test_df, test_prediction[:len(targets_for_test_df)])	
print(f'Test Accuracy: {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(targets_for_test_df, test_prediction[:len(targets_for_test_df)], average='macro')
print(f'Test F1 Macro Score: {f1_macro:.5f}')

Test Accuracy: 0.58005
Test F1 Macro Score: 0.36711


# Random Prediction

In [18]:
import numpy as np

# Generate random predictions for the test data
np.random.seed(42)  # For reproducibility
random_predictions = np.random.choice([0, 1], size=len(test_df))

# Calculate accuracy
accuracy = accuracy_score(targets_for_test_df, random_predictions[:len(targets_for_test_df)])
print(f'Test Accuracy (Random): {accuracy:.5f}')

# Calculate F1 macro score
f1_macro = f1_score(targets_for_test_df, random_predictions[:len(targets_for_test_df)], average='macro')
print(f'Test F1 Macro Score (Random): {f1_macro:.5f}')

Test Accuracy (Random): 0.50025
Test F1 Macro Score (Random): 0.49700


In [19]:

# Create a new DataFrame for the submission
submission_df = pd.DataFrame({
    'row_id': test_df['row_id'],
    'target': random_predictions
})

# Save the submission file
submission_df.to_csv('submission.csv', index=False)
print("Predictions saved to submission.csv")


Predictions saved to submission.csv
