In [22]:
import pandas as pd
import math
import numpy as np
import torch
from sklearn.model_selection import train_test_split

In [23]:
df = pd.read_excel("./data.xlsx")
df.columns

Index(['id', 'pair', 'coin', 'coin_address', 'buying_price', 'selling_price',
       'buying_coin_age', 'selling_coin_age', 'buying_transactions_buys_m5',
       'buying_transactions_sells_m5', 'buying_transactions_buys_h1',
       'buying_transactions_sells_h1', 'buying_total_transfers',
       'buying_total_transactions', 'selling_transactions_buys_m5',
       'selling_transactions_sells_m5', 'selling_transactions_buys_h1',
       'selling_transactions_sells_h1', 'buying_volume_m5',
       'selling_volume_m5', 'buying_volume_h1', 'selling_volume_h1',
       'buying_price_change_m5', 'buying_price_change_h1',
       'selling_price_change_m5', 'selling_price_change_h1',
       'buying_liquidity', 'selling_liquidity', 'buying_fdv', 'selling_fdv',
       'buying_market_cap', 'selling_market_cap', 'is_telegram', 'is_twitter',
       'is_website', 'opening_date', 'closing_date', 'PNL', 'status', 'type'],
      dtype='object')

In [24]:
drop_columns = [
    "id",
    "pair",
    "coin",
    "coin_address", 
    "selling_price", 
    "selling_coin_age", 
    "buying_transactions_buys_h1", 
    "buying_transactions_sells_h1", 
    "selling_transactions_buys_m5", 
    "selling_transactions_sells_m5", 
    "selling_transactions_buys_h1", 
    "selling_transactions_sells_h1", 
    "selling_volume_m5", 
    "buying_volume_h1", 
    "selling_volume_h1", 
    "buying_price_change_h1",
    "selling_price_change_m5", 
    "selling_price_change_h1", 
    "selling_liquidity", 
    "selling_fdv", 
    "selling_market_cap", 
    "opening_date", 
    "closing_date", 
    "status", 
    "type"]
df = df.drop(drop_columns, axis = 1)
df.head()

Unnamed: 0,buying_price,buying_coin_age,buying_transactions_buys_m5,buying_transactions_sells_m5,buying_total_transfers,buying_total_transactions,buying_volume_m5,buying_price_change_m5,buying_liquidity,buying_fdv,buying_market_cap,is_telegram,is_twitter,is_website,PNL
0,2017,2997760866666667,210,22,245.0,420,3402,109,272823,201780,201780,1,0,1,-9738
1,1977,2546277566666667,171,17,146.0,342,297453,77,271378,197730,197730,0,0,0,-9664
2,2215,29628692666666665,178,25,279.0,356,17154,99,28653,221600,221600,0,0,0,6971
3,5371,22864479666666666,112,29,1.0,224,15360,1507,714449,537160,537160,1,1,1,-3115
4,7639,11391634333333334,65,14,409.0,130,4787,24,2798521,763910,763910,0,0,0,7476


In [25]:
renamed_columns = {
    "buying_price": "price", 
    "buying_coin_age": "coin_age", 
    "buying_transactions_buys_m5": "buys_m5",
    "buying_transactions_sells_m5": "sells_m5", 
    "buying_total_transfers": "total_transfers",
    "buying_total_transactions": "total_transactions", 
    "buying_volume_m5": "volume_m5",
    "buying_price_change_m5": "price_change_m5",
    "buying_liquidity": "liquidity", 
    "buying_fdv": "fdv",
    "buying_market_cap": "market_cap"
}
df.rename(columns=renamed_columns, inplace=True) 
df

Unnamed: 0,price,coin_age,buys_m5,sells_m5,total_transfers,total_transactions,volume_m5,price_change_m5,liquidity,fdv,market_cap,is_telegram,is_twitter,is_website,PNL
0,000002017,2997760866666667,210,22,245.0,420,3402,109,272823,201780,201780,1,0,1,-9738
1,000001977,2546277566666667,171,17,146.0,342,297453,77,271378,197730,197730,0,0,0,-9664
2,000002215,29628692666666665,178,25,279.0,356,17154,99,28653,221600,221600,0,0,0,6971
3,000005371,22864479666666666,112,29,1.0,224,15360,1507,714449,537160,537160,1,1,1,-3115
4,000007639,11391634333333334,65,14,409.0,130,4787,24,2798521,763910,763910,0,0,0,7476
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
290,00000822,11618589166666666,33,9,3365.0,66,14014,37,2835447,822010,822010,1,0,0,7847
291,000001556,28510967166666665,108,42,143.0,216,24690,1918,675815,155670,155670,0,1,0,7404
292,00001193,20249139166666668,127,56,4914.0,254,29917,96,3439514,1193180,1193180,0,0,0,6471
293,000005807,20921795333333333,76,51,787.0,152,13556,-4,2401445,580720,580720,1,1,0,6370


In [26]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 295 entries, 0 to 294
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   price               295 non-null    object 
 1   coin_age            295 non-null    object 
 2   buys_m5             295 non-null    int64  
 3   sells_m5            295 non-null    int64  
 4   total_transfers     291 non-null    float64
 5   total_transactions  295 non-null    int64  
 6   volume_m5           295 non-null    int64  
 7   price_change_m5     295 non-null    int64  
 8   liquidity           295 non-null    object 
 9   fdv                 295 non-null    object 
 10  market_cap          295 non-null    object 
 11  is_telegram         295 non-null    int64  
 12  is_twitter          295 non-null    int64  
 13  is_website          295 non-null    int64  
 14  PNL                 295 non-null    object 
dtypes: float64(1), int64(8), object(6)
memory usage: 34.7+ KB

In [27]:
df = df.dropna(subset=['total_transfers'])


In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 291 entries, 0 to 294
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   price               291 non-null    object 
 1   coin_age            291 non-null    object 
 2   buys_m5             291 non-null    int64  
 3   sells_m5            291 non-null    int64  
 4   total_transfers     291 non-null    float64
 5   total_transactions  291 non-null    int64  
 6   volume_m5           291 non-null    int64  
 7   price_change_m5     291 non-null    int64  
 8   liquidity           291 non-null    object 
 9   fdv                 291 non-null    object 
 10  market_cap          291 non-null    object 
 11  is_telegram         291 non-null    int64  
 12  is_twitter          291 non-null    int64  
 13  is_website          291 non-null    int64  
 14  PNL                 291 non-null    object 
dtypes: float64(1), int64(8), object(6)
memory usage: 36.4+ KB


In [29]:
df["price"] = df["price"].replace(",",".", regex=True).astype(float)
df["coin_age"] = df["coin_age"].replace(",",".", regex=True).astype(float)
df["liquidity"] = df["liquidity"].replace(",",".", regex=True).astype(float)
df["fdv"] = df["fdv"].replace(",",".", regex=True).astype(float)
df["market_cap"] = df["market_cap"].replace(",",".", regex=True).astype(float)
df["PNL"] = df["PNL"].replace(",",".", regex=True).astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["price"] = df["price"].replace(",",".", regex=True).astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["coin_age"] = df["coin_age"].replace(",",".", regex=True).astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["liquidity"] = df["liquidity"].replace(",",".", regex=True

In [30]:
df.loc[df["PNL"] >= 60, "profit"] = 1
df.loc[df["PNL"] < 60, "profit"] = 0
df = df.drop(["PNL"], axis = 1)
df["profit"] = df["profit"].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[df["PNL"] >= 60, "profit"] = 1


In [31]:
df = df[(df["coin_age"] < 6) & (df["liquidity"] >= 1000)]

In [32]:
df.head()

Unnamed: 0,price,coin_age,buys_m5,sells_m5,total_transfers,total_transactions,volume_m5,price_change_m5,liquidity,fdv,market_cap,is_telegram,is_twitter,is_website,profit
0,2e-05,2.997761,210,22,245.0,420,3402,109,2728.23,20178.0,20178.0,1,0,1,0
1,2e-05,2.546278,171,17,146.0,342,297453,77,2713.78,19773.0,19773.0,0,0,0,0
2,2.2e-05,2.962869,178,25,279.0,356,17154,99,2865.3,22160.0,22160.0,0,0,0,1
3,5.4e-05,2.286448,112,29,1.0,224,15360,1507,7144.49,53716.0,53716.0,1,1,1,0
4,7.6e-05,1.139163,65,14,409.0,130,4787,24,27985.21,76391.0,76391.0,0,0,0,1


In [33]:
df.describe()

Unnamed: 0,price,coin_age,buys_m5,sells_m5,total_transfers,total_transactions,volume_m5,price_change_m5,liquidity,fdv,market_cap,is_telegram,is_twitter,is_website,profit
count,284.0,284.0,284.0,284.0,284.0,284.0,284.0,284.0,284.0,284.0,284.0,284.0,284.0,284.0,284.0
mean,0.0638892,2.467837,113.612676,41.126761,6028.591549,228.260563,53975.602113,137905.0,60948.200775,129175300.0,129175300.0,0.116197,0.109155,0.091549,0.443662
std,0.09864698,0.971388,89.743321,57.799074,55537.999196,181.250863,61213.113944,397406.1,73124.613346,480894100.0,480894100.0,0.321027,0.312384,0.288898,0.497693
min,5.676e-08,0.810816,0.0,0.0,0.0,0.0,3.0,-89.0,1087.6,2565.0,2565.0,0.0,0.0,0.0,0.0
25%,7.35975e-05,1.742977,57.0,12.0,2.0,114.0,15183.5,43.75,25445.66,68650.75,68650.75,0.0,0.0,0.0,0.0
50%,0.017325,2.496501,97.0,25.0,233.5,194.0,30497.5,27919.0,37698.595,16828760.0,16828760.0,0.0,0.0,0.0,0.0
75%,0.0930125,3.064882,156.75,52.25,436.25,313.5,74643.0,135199.5,82371.33,95533060.0,95533060.0,0.0,0.0,0.0,1.0
max,0.4358,5.315377,688.0,471.0,818285.0,1376.0,350169.0,4083819.0,509352.43,3830535000.0,3830535000.0,1.0,1.0,1.0,1.0


In [34]:
x = df.drop(columns=["profit"])
y = df["profit"].copy()

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [35]:
x_train = torch.tensor(x_train.values).to(torch.float32)
x_test = torch.tensor(x_test.values).to(torch.float32)
y_train = torch.tensor(y_train.values).to(torch.float32).unsqueeze(1)
y_test = torch.tensor(y_test.values).to(torch.float32).unsqueeze(1)

In [36]:
class LogisticRegression(torch.nn.Module):

    def __init__(self, n_features):
        super(LogisticRegression, self).__init__()
        self.lr = torch.nn.Linear(n_features, 1)
        
    def forward(self, x):
        out = torch.sigmoid(self.lr(x))
        return out

In [37]:
n_features = x_train.shape[1]
model = LogisticRegression(n_features)

optim = torch.optim.SGD(model.parameters(), lr=1)
criterion = torch.nn.BCELoss()

In [38]:
EPOCHS = 3

def train(model, optim, criterion, x, y, epochs=EPOCHS):
    for e in range(1, epochs + 1):
        optim.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optim.step()
        print(f'Среднее значение функции потерь за эпоху {e}: {loss.data}')
    return model

In [39]:
model = train(model, optim, criterion, x_train, y_train)

Среднее значение функции потерь за эпоху 1: 55.50660705566406
Среднее значение функции потерь за эпоху 2: 55.50660705566406
Среднее значение функции потерь за эпоху 3: 55.50660705566406


In [40]:
def accuracy(model, x, y):
    out = model(x)
    correct = torch.abs(y - out) < 0.5
    return correct.float().mean()

plain_accuracy = accuracy(model, x_test, y_test)
print(f'Точность модели: {plain_accuracy}')

Точность модели: 0.4385964870452881


In [41]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

x = df.drop(columns=["profit"])
y = df["profit"].copy()

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Создание и обучение модели логистической регрессии
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Предсказание классов на тестовой выборке
y_pred = model.predict(X_test)

# Вычисление точности классификации
accuracy = accuracy_score(y_test, y_pred)
print("Точность классификации: {:.2f}%".format(accuracy * 100))

Точность классификации: 57.89%
