In [None]:
import json
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Initialize FinMem model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('FinMem/finmem-base')
model = AutoModelForSequenceClassification.from_pretrained('FinMem/finmem-base')

# Load historical price data from JSON for BTC and ETH
btc_price_data = pd.read_json('/btc_train.json')
eth_price_data = pd.read_json('/eth_train.json')

# Load news data from JSON for BTC and ETH
with open('BTC_news.json', 'r') as f:
    btc_news_data = json.load(f)
with open('ETH_news.json', 'r') as f:
    eth_news_data = json.load(f)

# Convert news data into DataFrames
btc_news_df = pd.DataFrame(btc_news_data)
eth_news_df = pd.DataFrame(eth_news_data)

# Function to analyze sentiment using FinMem
def analyze_sentiment(text):
    inputs = tokenizer(text, return_tensors='pt')
    outputs = model(**inputs)
    sentiment_scores = torch.softmax(outputs.logits, dim=1).tolist()[0]
    return sentiment_scores  # [negative_score, positive_score]

# Apply sentiment analysis to BTC news data
btc_news_df['sentiment_scores'] = btc_news_df['content'].apply(analyze_sentiment)
btc_news_df['positive_score'] = btc_news_df['sentiment_scores'].apply(lambda x: x[1])
btc_news_df['negative_score'] = btc_news_df['sentiment_scores'].apply(lambda x: x[0])
btc_news_df['sentiment_indicator'] = btc_news_df['positive_score'] - btc_news_df['negative_score']

# Apply sentiment analysis to ETH news data
eth_news_df['sentiment_scores'] = eth_news_df['content'].apply(analyze_sentiment)
eth_news_df['positive_score'] = eth_news_df['sentiment_scores'].apply(lambda x: x[1])
eth_news_df['negative_score'] = eth_news_df['sentiment_scores'].apply(lambda x: x[0])
eth_news_df['sentiment_indicator'] = eth_news_df['positive_score'] - eth_news_df['negative_score']

# Merge price data and news sentiment data for BTC
btc_data = pd.merge(btc_price_data, btc_news_df, left_on='date', right_on='published_at', how='inner')

# Merge price data and news sentiment data for ETH
eth_data = pd.merge(eth_price_data, eth_news_df, left_on='date', right_on='published_at', how='inner')

# Feature Engineering for BTC
btc_data['MA'] = btc_data['Close'].rolling(window=14).mean()
btc_data['RSI'] = compute_rsi(btc_data['Close'], window=14)
btc_features = btc_data[['MA', 'RSI', 'sentiment_indicator']]
btc_labels = btc_data['Target']

# Feature Engineering for ETH
eth_data['MA'] = eth_data['Close'].rolling(window=14).mean()
eth_data['RSI'] = compute_rsi(eth_data['Close'], window=14)
eth_features = eth_data[['MA', 'RSI', 'sentiment_indicator']]
eth_labels = eth_data['Target']

# Function to train and evaluate the model using FinMem's outputs
def train_and_evaluate(features, labels, crypto_name):
    # Data Splitting
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)
    # Data Normalization
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    # Model Training using Random Forest
    from sklearn.ensemble import RandomForestClassifier
    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_model.fit(X_train_scaled, y_train)
    # Prediction
    y_pred = rf_model.predict(X_test_scaled)
    # Evaluation
    print(f"{crypto_name} Model Performance with FinMem:")
    print(classification_report(y_test, y_pred))

# Train and evaluate for BTC
train_and_evaluate(btc_features, btc_labels, 'BTC')

# Train and evaluate for ETH
train_and_evaluate(eth_features, eth_labels, 'ETH')


In [3]:
import json
from datetime import datetime
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from tqdm import tqdm
import warnings

warnings.filterwarnings("ignore")

# File paths for your datasets
BTC_DATA_PATH = 'D:/AIT526/Project/btc_train.json'
ETH_DATA_PATH = 'D:/AIT526/Project/eth_train.json'

def load_json_data(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

def preprocess_data(data):
    df = pd.DataFrame(data).T
    df.index = pd.to_datetime(df.index)
    df['prices'] = df['prices'].astype(float)
    return df

def calculate_price_changes(df):
    return df['prices'].pct_change().fillna(0)

class FINMEMAgent:
    def __init__(self, nlp_model, tokenizer):
        self.nlp_model = nlp_model
        self.tokenizer = tokenizer
        self.memory = []
        self.character = "adaptive"

    def process_news_batch(self, news_batch):
        inputs = self.tokenizer(news_batch, return_tensors="pt", padding=True, truncation=True, max_length=512)
        with torch.no_grad():
            outputs = self.nlp_model(**inputs)
        sentiments = torch.nn.functional.softmax(outputs.logits, dim=-1)
        return sentiments.numpy()

    def update_memory(self, data):
        self.memory.append(data)
        if len(self.memory) > 100:
            self.memory.pop(0)

    def make_decision(self, sentiment, btc_price_change, eth_price_change):
        if sentiment[1] > 0.6 and (btc_price_change > 0.01 or eth_price_change > 0.01):
            return "BUY"
        elif sentiment[0] > 0.6 and (btc_price_change < -0.01 or eth_price_change < -0.01):
            return "SELL"
        return "HOLD"

def process_news(agent, news_list):
    chunk_size = 32
    sentiments = []
    for i in tqdm(range(0, len(news_list), chunk_size), desc="Processing news"):
        chunk = news_list[i:i+chunk_size]
        sentiments.extend(agent.process_news_batch(chunk))
    return np.array(sentiments)

def calculate_metrics(decisions_df):
    returns = decisions_df['portfolio_value'].pct_change()
    sharpe_ratio = np.sqrt(252) * returns.mean() / returns.std()
    max_drawdown = (decisions_df['portfolio_value'] / decisions_df['portfolio_value'].cummax() - 1).min()
    return sharpe_ratio, max_drawdown

def main():
    print("Loading data...")
    btc_data = load_json_data(BTC_DATA_PATH)
    eth_data = load_json_data(ETH_DATA_PATH)

    print("Preprocessing data...")
    btc_df = preprocess_data(btc_data)
    eth_df = preprocess_data(eth_data)

    combined_df = pd.concat([btc_df, eth_df], axis=1, keys=['BTC', 'ETH'])
    combined_df = combined_df.sort_index().ffill()

    combined_df['BTC', 'price_change'] = calculate_price_changes(combined_df['BTC'])
    combined_df['ETH', 'price_change'] = calculate_price_changes(combined_df['ETH'])

    print("Loading NLP model...")
    tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert", clean_up_tokenization_spaces=True)
    nlp_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
    agent = FINMEMAgent(nlp_model, tokenizer)

    print("Preparing news data...")
    all_news = []
    for date in combined_df.index:
        btc_news = combined_df.loc[date, ('BTC', 'news')] if ('BTC', 'news') in combined_df.columns else []
        eth_news = combined_df.loc[date, ('ETH', 'news')] if ('ETH', 'news') in combined_df.columns else []
        btc_news = btc_news if isinstance(btc_news, list) else []
        eth_news = eth_news if isinstance(eth_news, list) else []
        all_news.extend(btc_news + eth_news)

    print("Processing news...")
    sentiments = process_news(agent, all_news)
    sentiment_index = 0

    print("Making trading decisions...")
    decisions = []
    portfolio = {'cash': 10000, 'BTC': 0, 'ETH': 0}
    for date in tqdm(combined_df.index, desc="Trading simulation"):
        try:
            btc_news = combined_df.loc[date, ('BTC', 'news')] if ('BTC', 'news') in combined_df.columns else []
            eth_news = combined_df.loc[date, ('ETH', 'news')] if ('ETH', 'news') in combined_df.columns else []
            btc_news = btc_news if isinstance(btc_news, list) else []
            eth_news = eth_news if isinstance(eth_news, list) else []

            news_count = len(btc_news) + len(eth_news)
            if news_count > 0:
                avg_sentiment = np.mean(sentiments[sentiment_index:sentiment_index+news_count], axis=0)
                sentiment_index += news_count
            else:
                avg_sentiment = np.array([0.33, 0.33, 0.33])  # Neutral sentiment if no news

            btc_price_change = combined_df.loc[date, ('BTC', 'price_change')]
            eth_price_change = combined_df.loc[date, ('ETH', 'price_change')]

            decision = agent.make_decision(avg_sentiment, btc_price_change, eth_price_change)

            btc_price = combined_df.loc[date, ('BTC', 'prices')]
            eth_price = combined_df.loc[date, ('ETH', 'prices')]

            if decision == "BUY" and portfolio['cash'] > 0:
                btc_to_buy = (portfolio['cash'] / 2) / btc_price
                eth_to_buy = (portfolio['cash'] / 2) / eth_price
                portfolio['BTC'] += btc_to_buy
                portfolio['ETH'] += eth_to_buy
                portfolio['cash'] = 0
            elif decision == "SELL" and (portfolio['BTC'] > 0 or portfolio['ETH'] > 0):
                portfolio['cash'] += portfolio['BTC'] * btc_price + portfolio['ETH'] * eth_price
                portfolio['BTC'] = 0
                portfolio['ETH'] = 0

            portfolio_value = portfolio['cash'] + portfolio['BTC'] * btc_price + portfolio['ETH'] * eth_price

            decisions.append({
                'date': date,
                'decision': decision,
                'btc_price': btc_price,
                'eth_price': eth_price,
                'btc_price_change': btc_price_change,
                'eth_price_change': eth_price_change,
                'portfolio_value': portfolio_value
            })
            agent.update_memory({'date': date, 'sentiment': avg_sentiment})
        except Exception as e:
            print(f"Error processing date {date}: {e}")
            continue

    print("Analyzing results...")
    if not decisions:
        print("No trading decisions were made. Please check your data and decision-making logic.")
        return

    decisions_df = pd.DataFrame(decisions)
    print(decisions_df['decision'].value_counts())
    print(decisions_df.sample(min(10, len(decisions_df))))

    initial_value = decisions_df['portfolio_value'].iloc[0]
    final_value = decisions_df['portfolio_value'].iloc[-1]
    overall_return = (final_value - initial_value) / initial_value * 100

    sharpe_ratio, max_drawdown = calculate_metrics(decisions_df)

    print(f"\nOverall Performance:")
    print(f"Initial Portfolio Value: ${initial_value:.2f}")
    print(f"Final Portfolio Value: ${final_value:.2f}")
    print(f"Total Return: {overall_return:.2f}%")
    print(f"Sharpe Ratio: {sharpe_ratio:.2f}")
    print(f"Maximum Drawdown: {max_drawdown:.2f}")

    print("Plotting results...")
    import matplotlib.pyplot as plt
    plt.figure(figsize=(12, 6))
    plt.plot(decisions_df['date'], decisions_df['portfolio_value'])
    plt.title('Portfolio Value Over Time')
    plt.xlabel('Date')
    plt.ylabel('Portfolio Value ($)')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    main()

ModuleNotFoundError: No module named 'pandas'

In [None]:
import json
from datetime import datetime
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from tqdm import tqdm
import warnings

warnings.filterwarnings("ignore")

# File paths for your datasets
BTC_DATA_PATH = '/btc_train.json'
ETH_DATA_PATH = '/eth_train.json'

def load_json_data(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

def preprocess_data(data):
    df = pd.DataFrame(data).T
    df.index = pd.to_datetime(df.index)
    df['prices'] = df['prices'].astype(float)
    return df

def calculate_price_changes(df):
    return df['prices'].pct_change()

class FINMEMAgent:
    def __init__(self, nlp_model, tokenizer):
        self.nlp_model = nlp_model
        self.tokenizer = tokenizer
        self.memory = []
        self.character = "adaptive"

    def process_news_batch(self, news_batch):
        inputs = self.tokenizer(news_batch, return_tensors="pt", padding=True, truncation=True, max_length=512)
        with torch.no_grad():
            outputs = self.nlp_model(**inputs)
        sentiments = torch.nn.functional.softmax(outputs.logits, dim=-1)
        return sentiments.numpy()

    def update_memory(self, data):
        self.memory.append(data)
        if len(self.memory) > 100:
            self.memory.pop(0)

    def make_decision(self, sentiment, btc_price_change, eth_price_change):
        if sentiment[1] > 0.6 and (btc_price_change > 0.01 or eth_price_change > 0.01):
            return "BUY"
        elif sentiment[0] > 0.6 and (btc_price_change < -0.01 or eth_price_change < -0.01):
            return "SELL"
        return "HOLD"

def process_news(agent, news_list):
    chunk_size = 32
    sentiments = []
    for i in tqdm(range(0, len(news_list), chunk_size), desc="Processing news"):
        chunk = news_list[i:i+chunk_size]
        sentiments.extend(agent.process_news_batch(chunk))
    return np.array(sentiments)

def calculate_metrics(decisions_df):
    returns = decisions_df['portfolio_value'].pct_change()
    sharpe_ratio = np.sqrt(252) * returns.mean() / returns.std()
    max_drawdown = (decisions_df['portfolio_value'] / decisions_df['portfolio_value'].cummax() - 1).min()
    return sharpe_ratio, max_drawdown

def main():
    print("Loading data...")
    btc_data = load_json_data(BTC_DATA_PATH)
    eth_data = load_json_data(ETH_DATA_PATH)

    print("Preprocessing data...")
    btc_df = preprocess_data(btc_data)
    eth_df = preprocess_data(eth_data)

    combined_df = pd.concat([btc_df, eth_df], axis=1, keys=['BTC', 'ETH'])
    combined_df = combined_df.sort_index().ffill()

    combined_df['BTC', 'price_change'] = calculate_price_changes(combined_df['BTC'])
    combined_df['ETH', 'price_change'] = calculate_price_changes(combined_df['ETH'])

    print("Loading NLP model...")
    tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert", clean_up_tokenization_spaces=True)
    nlp_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
    agent = FINMEMAgent(nlp_model, tokenizer)

    print("Preparing news data...")
    all_news = []
    for date in combined_df.index:
        btc_news = combined_df.loc[date, ('BTC', 'news')] if ('BTC', 'news') in combined_df.columns else []
        eth_news = combined_df.loc[date, ('ETH', 'news')] if ('ETH', 'news') in combined_df.columns else []
        btc_news = btc_news if isinstance(btc_news, list) else []
        eth_news = eth_news if isinstance(eth_news, list) else []
        all_news.extend(btc_news + eth_news)

    print("Processing news...")
    sentiments = process_news(agent, all_news)
    sentiment_index = 0

    print("Making trading decisions...")
    decisions = []
    portfolio = {'cash': 10000, 'BTC': 0, 'ETH': 0}
    for date in tqdm(combined_df.index, desc="Trading simulation"):
        try:
            btc_news = combined_df.loc[date, ('BTC', 'news')] if ('BTC', 'news') in combined_df.columns else []
            eth_news = combined_df.loc[date, ('ETH', 'news')] if ('ETH', 'news') in combined_df.columns else []
            btc_news = btc_news if isinstance(btc_news, list) else []
            eth_news = eth_news if isinstance(eth_news, list) else []

            news_count = len(btc_news) + len(eth_news)
            if news_count > 0:
                avg_sentiment = np.mean(sentiments[sentiment_index:sentiment_index+news_count], axis=0)
                sentiment_index += news_count
            else:
                avg_sentiment = np.array([0.33, 0.33, 0.33])  # Neutral sentiment if no news

            btc_price_change = combined_df.loc[date, ('BTC', 'price_change')]
            eth_price_change = combined_df.loc[date, ('ETH', 'price_change')]

            decision = agent.make_decision(avg_sentiment, btc_price_change, eth_price_change)

            btc_price = combined_df.loc[date, ('BTC', 'prices')]
            eth_price = combined_df.loc[date, ('ETH', 'prices')]

            if decision == "BUY" and portfolio['cash'] > 0:
                btc_to_buy = (portfolio['cash'] / 2) / btc_price
                eth_to_buy = (portfolio['cash'] / 2) / eth_price
                portfolio['BTC'] += btc_to_buy
                portfolio['ETH'] += eth_to_buy
                portfolio['cash'] = 0
            elif decision == "SELL" and (portfolio['BTC'] > 0 or portfolio['ETH'] > 0):
                portfolio['cash'] += portfolio['BTC'] * btc_price + portfolio['ETH'] * eth_price
                portfolio['BTC'] = 0
                portfolio['ETH'] = 0

            portfolio_value = portfolio['cash'] + portfolio['BTC'] * btc_price + portfolio['ETH'] * eth_price

            decisions.append({
                'date': date,
                'decision': decision,
                'btc_price': btc_price,
                'eth_price': eth_price,
                'btc_price_change': btc_price_change,
                'eth_price_change': eth_price_change,
                'portfolio_value': portfolio_value
            })
            agent.update_memory({'date': date, 'sentiment': avg_sentiment})
        except Exception as e:
            print(f"Error processing date {date}: {e}")
            continue

    print("Analyzing results...")
    decisions_df = pd.DataFrame(decisions)
    print(decisions_df['decision'].value_counts())
    print(decisions_df.sample(10))

    initial_value = decisions_df['portfolio_value'].iloc[0]
    final_value = decisions_df['portfolio_value'].iloc[-1]
    overall_return = (final_value - initial_value) / initial_value * 100

    sharpe_ratio, max_drawdown = calculate_metrics(decisions_df)

    print(f"\nOverall Performance:")
    print(f"Initial Portfolio Value: ${initial_value:.2f}")
    print(f"Final Portfolio Value: ${final_value:.2f}")
    print(f"Total Return: {overall_return:.2f}%")
    print(f"Sharpe Ratio: {sharpe_ratio:.2f}")
    print(f"Maximum Drawdown: {max_drawdown:.2f}")

    print("Plotting results...")
    import matplotlib.pyplot as plt
    plt.figure(figsize=(12, 6))
    plt.plot(decisions_df['date'], decisions_df['portfolio_value'])
    plt.title('Portfolio Value Over Time')
    plt.xlabel('Date')
    plt.ylabel('Portfolio Value ($)')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    main()

Loading data...
Preprocessing data...
Loading NLP model...
Preparing news data...
Processing news...


Processing news:  17%|█▋        | 4/24 [00:34<02:46,  8.32s/it]