This code defines functions for training and evaluating an SVM model, as well as a function for performing analysis of the model with given hyperparameters. The driver code loads the text and label data from a dataframe, and calls the svm_analysis function with the kernel and C hyperparameters set to 'linear' and 1.0 respectively. This function performs analysis of the SVM model on the given data and hyperparameters, and prints the precision, recall, F1-score, and accuracy results.

In [None]:
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from nltk.stem import PorterStemmer
from nltk.sentiment.vader import SentimentIntensityAnalyzer

def preprocess_text(text):
    text = re.sub('[^a-zA-Z]', ' ', str(text))
    text = text.lower()
    text = text.split()
    ps = PorterStemmer()
    text = [ps.stem(word) for word in text]
    text = ' '.join(text)
    return text

def get_polarity_score(text):
    sid = SentimentIntensityAnalyzer()
    polarity_score = sid.polarity_scores(text)
    return polarity_score['compound']

def get_similar_articles(user_text, n_similar=10):
    df = pd.read_csv('crypto_news_dataset.csv')
    df.dropna(inplace=True)
    df['processed_text'] = df['text'].apply(preprocess_text)
    tfidf = TfidfVectorizer(max_features=10000)
    tfidf.fit(df['processed_text'])
    user_text_processed = preprocess_text(user_text)
    user_tfidf = tfidf.transform([user_text_processed])
    similarity_scores = cosine_similarity(user_tfidf, tfidf.transform(df['processed_text']))
    df['similarity'] = similarity_scores[0]
    df.sort_values(by='similarity', ascending=False, inplace=True)
    similar_articles = df[['text', 'source', 'url']].head(n_similar).values.tolist()
    return similar_articles

def svm_analysis_params():
    analysis_params = {}
    analysis_params['C'] = [0.1, 1, 10, 100]
    analysis_params['kernel'] = ['linear', 'poly', 'rbf', 'sigmoid']
    return analysis_params

def run_svm(C, kernel):
    df = pd.read_csv('crypto_news_dataset.csv')
    df.dropna(inplace=True)
    df['processed_text'] = df['text'].apply(preprocess_text)
    tfidf = TfidfVectorizer(max_features=10000)
    tfidf.fit(df['processed_text'])
    X = tfidf.transform(df['processed_text'])
    y = df['text']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    from sklearn.svm import SVC
    clf = SVC(C=C, kernel=kernel)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, pos_label='real')
    recall = recall_score(y_test, y_pred, pos_label='real')
    f1 = f1_score(y_test, y_pred, pos_label='real')
    return accuracy, precision, recall, f1

def svm_driver():
    user_text = input("Enter a text for testing: ")
    similar_articles = get_similar_articles(user_text)
    print(f"Similar articles for '{user_text}':")
    for i, article in enumerate(similar_articles):
        print(f"{i+1}. {article[0]}")
        print(f"Source: {article[1]}")
        print(f"URL: {article[2]}")
        print('\n')
    print("\n")
    analysis_params = svm_analysis_params()
    for C in analysis_params['C']:
        for kernel in analysis_params['kernel']:
            accuracy, precision, recall, f1 = run_svm(C, kernel)
            print(f"\nSVM Results for C={C}, kernel={kernel}:")
            print(f"Accuracy: {accuracy}")
            print(f"Precision: {precision}")
            print(f"Recall: {recall}")
            print(f"F1-score: {f1}\n")
svm_driver()


Enter a text for testing: Walmart and Litecoin Payment News Debunked by Walmart Spokesperson, LTC Prices Shudder from Fake News
Similar articles for 'Walmart and Litecoin Payment News Debunked by Walmart Spokesperson, LTC Prices Shudder from Fake News':
1. 3 weeks ago, I had the pleasure of writing an article I’d dreamed of writing for months and months on end. Gyft Adds Walmart Gift Cards!! Alas, all good things are not meant to last. For the last 3 weeks, Bitcoiners around the country have had the pleasure of buying Walmart gift cards with Bitcoin, receiving 3% back in the form of Gyft points, and in essence spending Bitcoin at Walmart on gas and groceries. Making Bitcoin a cheaper option to buy gas has been a long standing dream of the Bitcoin community, this was the first step towards realizing that dream and said step has now been reversed. Despite the loss of Walmart, Gyft is geared to provide more to smaller businesses with the launch of Gyft Cloud. Gyft informed customers via e