In [15]:
import pandas as pd
import math
import numpy as np
import torch
from sklearn.model_selection import train_test_split

In [16]:
df = pd.read_excel("./data.xlsx")
df.columns

Index(['id', 'pair', 'coin', 'coin_address', 'buying_price', 'selling_price',
       'buying_coin_age', 'selling_coin_age', 'buying_transactions_buys_m5',
       'buying_transactions_sells_m5', 'buying_transactions_buys_h1',
       'buying_transactions_sells_h1', 'buying_total_transfers',
       'buying_total_transactions', 'selling_transactions_buys_m5',
       'selling_transactions_sells_m5', 'selling_transactions_buys_h1',
       'selling_transactions_sells_h1', 'buying_volume_m5',
       'selling_volume_m5', 'buying_volume_h1', 'selling_volume_h1',
       'buying_price_change_m5', 'buying_price_change_h1',
       'selling_price_change_m5', 'selling_price_change_h1',
       'buying_liquidity', 'selling_liquidity', 'buying_fdv', 'selling_fdv',
       'buying_market_cap', 'selling_market_cap', 'is_telegram', 'is_twitter',
       'is_website', 'opening_date', 'closing_date', 'PNL', 'status', 'type'],
      dtype='object')

In [17]:
drop_columns = [
    "id",
    "pair",
    "token",
    "token_address", 
    "selling_price", 
    "selling_token_age", 
    "buying_transactions_buys_h1", 
    "buying_transactions_sells_h1", 
    "selling_transactions_buys_m5", 
    "selling_transactions_sells_m5", 
    "selling_transactions_buys_h1", 
    "selling_transactions_sells_h1", 
    "selling_volume_m5", 
    "buying_volume_h1", 
    "selling_volume_h1", 
    "buying_price_change_h1",
    "selling_price_change_m5", 
    "selling_price_change_h1", 
    "selling_liquidity", 
    "selling_fdv", 
    "selling_market_cap", 
    "opening_date", 
    "closing_date", 
    "status", 
    "type"]
df = df.drop(drop_columns, axis = 1)
df.head()

Unnamed: 0,buying_price,buying_coin_age,buying_transactions_buys_m5,buying_transactions_sells_m5,buying_total_transfers,buying_total_transactions,buying_volume_m5,buying_price_change_m5,buying_liquidity,buying_fdv,buying_market_cap,is_telegram,is_twitter,is_website,PNL
0,1499,22842054333333333,124,31,259.0,248,83905,211212,9555895,1499018960,1499018960,0,0,0,
1,6543,2586920966666667,46,7,176.0,92,294197,3977875,58483779,58167804560,58167804560,0,0,0,
2,6659,38775534833333336,71,8,1.0,142,295644,4027279,59036825,59192301730,59192301730,0,0,0,-9767.0
3,6535,13659095833333335,22,9,607.0,44,2334,12,2515871,653560,653560,0,0,0,-3209.0
4,3608,33994868166666667,51,12,301.0,102,12183,8479,1872223,36086630,36086630,0,0,0,6231.0


In [18]:
renamed_columns = {
    "buying_price": "price", 
    "buying_token_age": "token_age", 
    "buying_transactions_buys_m5": "buys_m5",
    "buying_transactions_sells_m5": "sells_m5", 
    "buying_total_transfers": "total_transfers",
    "buying_total_transactions": "total_transactions", 
    "buying_volume_m5": "volume_m5",
    "buying_price_change_m5": "price_change_m5",
    "buying_liquidity": "liquidity", 
    "buying_fdv": "fdv",
    "buying_market_cap": "market_cap"
}
df.rename(columns=renamed_columns, inplace=True) 
df

Unnamed: 0,price,coin_age,buys_m5,sells_m5,total_transfers,total_transactions,volume_m5,price_change_m5,liquidity,fdv,market_cap,is_telegram,is_twitter,is_website,PNL
0,01499,22842054333333333,124,31,259.0,248,83905,211212,9555895,1499018960,1499018960,0,0,0,
1,00006543,2586920966666667,46,7,176.0,92,294197,3977875,58483779,58167804560,58167804560,0,0,0,
2,00006659,38775534833333336,71,8,1.0,142,295644,4027279,59036825,59192301730,59192301730,0,0,0,-9767
3,000006535,13659095833333335,22,9,607.0,44,2334,12,2515871,653560,653560,0,0,0,-3209
4,0003608,33994868166666667,51,12,301.0,102,12183,8479,1872223,36086630,36086630,0,0,0,6231
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
357,00000822,11618589166666666,33,9,3365.0,66,14014,37,2835447,822010,822010,1,0,0,7847
358,000001556,28510967166666665,108,42,143.0,216,24690,1918,675815,155670,155670,0,1,0,7404
359,00001193,20249139166666668,127,56,4914.0,254,29917,96,3439514,1193180,1193180,0,0,0,6471
360,000005807,20921795333333333,76,51,787.0,152,13556,-4,2401445,580720,580720,1,1,0,6370


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 362 entries, 0 to 361
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   price               362 non-null    object 
 1   coin_age            362 non-null    object 
 2   buys_m5             362 non-null    int64  
 3   sells_m5            362 non-null    int64  
 4   total_transfers     358 non-null    float64
 5   total_transactions  362 non-null    int64  
 6   volume_m5           362 non-null    int64  
 7   price_change_m5     362 non-null    int64  
 8   liquidity           362 non-null    object 
 9   fdv                 362 non-null    object 
 10  market_cap          362 non-null    object 
 11  is_telegram         362 non-null    int64  
 12  is_twitter          362 non-null    int64  
 13  is_website          362 non-null    int64  
 14  PNL                 360 non-null    object 
dtypes: float64(1), int64(8), object(6)
memory usage: 42.6+ KB

In [20]:
df = df.dropna(subset=['total_transfers', 'PNL'])


In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 356 entries, 2 to 361
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   price               356 non-null    object 
 1   coin_age            356 non-null    object 
 2   buys_m5             356 non-null    int64  
 3   sells_m5            356 non-null    int64  
 4   total_transfers     356 non-null    float64
 5   total_transactions  356 non-null    int64  
 6   volume_m5           356 non-null    int64  
 7   price_change_m5     356 non-null    int64  
 8   liquidity           356 non-null    object 
 9   fdv                 356 non-null    object 
 10  market_cap          356 non-null    object 
 11  is_telegram         356 non-null    int64  
 12  is_twitter          356 non-null    int64  
 13  is_website          356 non-null    int64  
 14  PNL                 356 non-null    object 
dtypes: float64(1), int64(8), object(6)
memory usage: 44.5+ KB


In [22]:
df["price"] = df["price"].replace(",",".", regex=True).astype(float)
df["token_age"] = df["token_age"].replace(",",".", regex=True).astype(float)
df["liquidity"] = df["liquidity"].replace(",",".", regex=True).astype(float)
df["fdv"] = df["fdv"].replace(",",".", regex=True).astype(float)
df["market_cap"] = df["market_cap"].replace(",",".", regex=True).astype(float)
df["PNL"] = df["PNL"].replace(",",".", regex=True).astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["price"] = df["price"].replace(",",".", regex=True).astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["coin_age"] = df["coin_age"].replace(",",".", regex=True).astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["liquidity"] = df["liquidity"].replace(",",".", regex=True

In [23]:
df.loc[df["PNL"] >= 60, "profit"] = 1
df.loc[df["PNL"] < 60, "profit"] = 0
df = df.drop(["PNL"], axis = 1)
df["profit"] = df["profit"].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[df["PNL"] >= 60, "profit"] = 1


In [24]:
df = df[(df["token_age"] < 6) & (df["liquidity"] >= 1000)]

In [25]:
df.head()

Unnamed: 0,price,coin_age,buys_m5,sells_m5,total_transfers,total_transactions,volume_m5,price_change_m5,liquidity,fdv,market_cap,is_telegram,is_twitter,is_website,profit
2,0.000666,3.877553,71,8,1.0,142,295644,4027279,590368.25,5919230000.0,5919230000.0,0,0,0,0
3,6.5e-05,1.36591,22,9,607.0,44,2334,12,25158.71,65356.0,65356.0,0,0,0,0
4,0.003608,3.399487,51,12,301.0,102,12183,8479,18722.23,3608663.0,3608663.0,0,0,0,1
5,0.3923,3.028979,191,35,448.0,382,109828,545781,155636.34,392331300.0,392331300.0,0,0,0,0
6,0.04135,1.979397,50,20,2.0,100,32495,57442,50437.82,41350270.0,41350270.0,0,0,0,0


In [26]:
df.describe()

Unnamed: 0,price,coin_age,buys_m5,sells_m5,total_transfers,total_transactions,volume_m5,price_change_m5,liquidity,fdv,market_cap,is_telegram,is_twitter,is_website,profit
count,349.0,349.0,349.0,349.0,349.0,349.0,349.0,349.0,349.0,349.0,349.0,349.0,349.0,349.0,349.0
mean,0.06425184,2.539831,114.326648,42.237822,4975.595989,230.045845,57280.951289,182936.0,68218.378911,193617000.0,193617000.0,0.097421,0.097421,0.083095,0.43553
std,0.09808716,0.986027,88.161339,58.127467,50132.917914,179.484825,65491.703126,531830.3,89690.65294,717526500.0,717526500.0,0.296956,0.296956,0.276421,0.496538
min,5.676e-08,0.810816,0.0,0.0,0.0,0.0,3.0,-89.0,1087.6,2565.0,2565.0,0.0,0.0,0.0,0.0
25%,7.654e-05,1.796398,56.0,12.0,2.0,112.0,16797.0,60.0,25636.03,69920.0,69920.0,0.0,0.0,0.0,0.0
50%,0.02131,2.570056,97.0,27.0,212.0,194.0,31790.0,35432.0,43273.53,21953410.0,21953410.0,0.0,0.0,0.0,0.0
75%,0.08856,3.20396,155.0,53.0,422.0,310.0,77024.0,143527.0,83235.21,101793700.0,101793700.0,0.0,0.0,0.0,1.0
max,0.4358,5.315377,688.0,471.0,818285.0,1376.0,351245.0,4107972.0,596931.37,6043169000.0,6043169000.0,1.0,1.0,1.0,1.0


In [27]:
x = df.drop(columns=["profit"])
y = df["profit"].copy()

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [28]:
x_train = torch.tensor(x_train.values).to(torch.float32)
x_test = torch.tensor(x_test.values).to(torch.float32)
y_train = torch.tensor(y_train.values).to(torch.float32).unsqueeze(1)
y_test = torch.tensor(y_test.values).to(torch.float32).unsqueeze(1)

In [29]:
class LogisticRegression(torch.nn.Module):

    def __init__(self, n_features):
        super(LogisticRegression, self).__init__()
        self.lr = torch.nn.Linear(n_features, 1)
        
    def forward(self, x):
        out = torch.sigmoid(self.lr(x))
        return out

In [30]:
n_features = x_train.shape[1]
model = LogisticRegression(n_features)

optim = torch.optim.SGD(model.parameters(), lr=1)
criterion = torch.nn.BCELoss()

In [31]:
EPOCHS = 3

def train(model, optim, criterion, x, y, epochs=EPOCHS):
    for e in range(1, epochs + 1):
        optim.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optim.step()
        print(f'Среднее значение функции потерь за эпоху {e}: {loss.data}')
    return model

In [32]:
model = train(model, optim, criterion, x_train, y_train)

Среднее значение функции потерь за эпоху 1: 44.44444274902344
Среднее значение функции потерь за эпоху 2: 44.44444274902344
Среднее значение функции потерь за эпоху 3: 44.44444274902344


In [33]:
def accuracy(model, x, y):
    out = model(x)
    correct = torch.abs(y - out) < 0.5
    return correct.float().mean()

plain_accuracy = accuracy(model, x_test, y_test)
print(f'Точность модели: {plain_accuracy}')

Точность модели: 0.5714285969734192


In [34]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

x = df.drop(columns=["profit"])
y = df["profit"].copy()

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Создание и обучение модели логистической регрессии
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Предсказание классов на тестовой выборке
y_pred = model.predict(X_test)

# Вычисление точности классификации
accuracy = accuracy_score(y_test, y_pred)
print("Точность классификации: {:.2f}%".format(accuracy * 100))

Точность классификации: 52.86%
