In [7]:
import pandas as pd
import numpy as np
from sklearn import metrics, preprocessing, linear_model
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.naive_bayes import GaussianNB

In [4]:
def main():
    # Set seed for reproducibility
    np.random.seed(0)

    print("Loading data...")
    # Load the data from the CSV files
    training_data = pd.read_csv(r'numerai_datasets\numerai_training_data.csv', header=0)
    prediction_data = pd.read_csv(r'numerai_datasets\numerai_tournament_data.csv', header=0)


    # Transform the loaded CSV data into numpy arrays
    features = [f for f in list(training_data) if "feature" in f]
    X = training_data[features]
    Y = training_data["target"]
    x_prediction = prediction_data[features]
    ids = prediction_data["id"]

    # This is your model that will learn to predict
    c=1   
    lr = LogisticRegression(C= 10.**c, random_state=1)
    print("Training...")
    # Your model is trained on the training_data
    lr.fit(X, Y)

    print("Predicting...")
    # Your trained model is now used to make predictions on the numerai_tournament_data
    # The model returns two columns: [probability of 0, probability of 1]
    # We are just interested in the probability that the target is 1.
    y_prediction = lr.predict_proba(x_prediction)
    results = y_prediction[:, 1]
    results_df = pd.DataFrame(data={'probability':results})
    joined = pd.DataFrame(ids).join(results_df)

    print("Writing predictions to predictions.csv")
    # Save the predictions out to a CSV file
    joined.to_csv("predictions.csv", index=False)
    # Now you can upload these predictions on numer.ai


if __name__ == '__main__':
    main()

Loading data...
Training...
Predicting...
Writing predictions to predictions.csv


In [4]:
training_data = pd.read_csv(r'numerai_datasets\numerai_training_data.csv', header=0)
test_data = pd.read_csv(r'numerai_datasets\numerai_tournament_data.csv', header=0)

In [9]:
scaler = StandardScaler()
features = [f for f in list(training_data) if "feature" in f]
X_train = training_data[features]
Y_train = training_data["target"]

X_test = training_data[features]

scaler.fit(X_train)
# Apply transform to both the training set and the test set.
X_train_std = scaler.transform(X_train)
X_test_std = scaler.transform(X_test)
# Make an instance of the Model
pca = PCA(0.)
pca.fit(X_train_std)
X_train_std = pca.transform(X_train_std)
X_test_std = pca.transform(X_test_std)
ids = test_data["id"]
#Create a Gaussian Classifier
model = GaussianNB()

# Train the model using the training sets 
model.fit(X_train, Y_train)
# svm = SVC(kernel= 'rbf', random_state=1,gamma=0.2,C=1.0)
# svm.fit(X_train_std, Y_train)
print("Training...")
# Your model is trained on the training_data
lr.fit(X_train_std, Y_train)

print("Predicting...")
# Your trained model is now used to make predictions on the numerai_tournament_data
# The model returns two columns: [probability of 0, probability of 1]
# We are just interested in the probability that the target is 1.
y_prediction = model.predict_proba(X_test)
results = y_prediction[:, 1]
results_df = pd.DataFrame(data={'probability':results})
joined = pd.DataFrame(ids).join(results_df)

print("Writing predictions to predictions.csv")
# Save the predictions out to a CSV file
joined.to_csv("predictions.csv", index=False)
# Now you can upload these predictions on numer.ai

Training...
Predicting...
Writing predictions to predictions.csv


In [None]:
scaler = StandardScaler()
features = [f for f in list(training_data) if "feature" in f]
X_train = training_data[features]
Y_train = training_data["target"]

X_test = training_data[features]

# Make an instance of the Model
pca = PCA(0.7)
pca.fit(X_train_std)
X_train = pca.transform(X_train)
X_test = pca.transform(X_test_std)
ids = test_data["id"]
#Create a Gaussian Classifier
model = GaussianNB()

# Train the model using the training sets 
model.fit(X_train, Y_train)
# svm = SVC(kernel= 'rbf', random_state=1,gamma=0.2,C=1.0)
# svm.fit(X_train_std, Y_train)
print("Training...")
# Your model is trained on the training_data
lr.fit(X_train_std, Y_train)

print("Predicting...")
# Your trained model is now used to make predictions on the numerai_tournament_data
# The model returns two columns: [probability of 0, probability of 1]
# We are just interested in the probability that the target is 1.
y_prediction = model.predict_proba(X_test)
results = y_prediction[:, 1]
results_df = pd.DataFrame(data={'probability':results})
joined = pd.DataFrame(ids).join(results_df)

print("Writing predictions to predictions.csv")
# Save the predictions out to a CSV file
joined.to_csv("predictions.csv", index=False)
# Now you can upload these predictions on numer.ai