In [3]:
import snscrape.modules.twitter as sntwitter
import re
from textblob import TextBlob
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

# Fonction pour analyser les sentiments avec TextBlob
def analyze_sentiment(text):
    blob = TextBlob(text)
    sentiment = blob.sentiment.polarity  # Retourne une valeur entre -1 (négatif) et 1 (positif)
    
    if sentiment > 0:
        return 'Positif'
    elif sentiment < 0:
        return 'Négatif'
    else:
        return 'Neutre'

# Fonction pour identifier le sujet avec Latent Dirichlet Allocation (LDA)
def identify_topics(texts, n_topics=2, n_top_words=5):
    vectorizer = CountVectorizer(stop_words='english')
    dtm = vectorizer.fit_transform(texts)
    
    lda = LatentDirichletAllocation(n_components=n_topics, random_state=0)
    lda.fit(dtm)
    
    topics = []
    for idx, topic in enumerate(lda.components_):
        terms = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-n_top_words - 1:-1]]
        topics.append("Topic %d: %s" % (idx, ", ".join(terms)))
    return topics

tweet_content = "France is in europe"

print("Contenu du tweet : ", tweet_content)

# Analyse des sentiments
sentiment = analyze_sentiment(tweet_content)
print("Sentiment détecté : ", sentiment)

# Identification du sujet
topics = identify_topics([tweet_content])
print("Sujets identifiés :")
for topic in topics:
    print(topic)


Contenu du tweet :  France is in europe
Sentiment détecté :  Neutre
Sujets identifiés :
Topic 0: france, europe
Topic 1: europe, france


In [None]:
from langchain_openai import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import SimpleSequentialChain
import sys

def fact_check(question):
    llm = OpenAI(temperature=0.7)
    template = """{question}\n\n"""
    prompt_template = PromptTemplate(input_variables=["question"], template=template)
    question_chain = LLMChain(llm=llm, prompt=prompt_template)

    template = """Here is a statement:
    {statement}
    Make a bullet point list of the assumptions you made when producing the above statement.\n\n"""
    prompt_template = PromptTemplate(input_variables=["statement"], template=template)
    assumptions_chain = LLMChain(llm=llm, prompt=prompt_template)

    template = """Here is a bullet point list of assertions:
    {assertions}
    For each assertion, determine whether it is true or false. If it is false, explain why.\n\n"""
    prompt_template = PromptTemplate(input_variables=["assertions"], template=template)
    fact_checker_chain = LLMChain(llm=llm, prompt=prompt_template)

    template = """In light of the above facts, how would you answer the question '{}'""".format(question)
    template = """{facts}\n""" + template
    prompt_template = PromptTemplate(input_variables=["facts"], template=template)
    answer_chain = LLMChain(llm=llm, prompt=prompt_template)

    overall_chain = SimpleSequentialChain(chains=[question_chain, assumptions_chain, fact_checker_chain, answer_chain], verbose=True)

    return overall_chain.run(question)

if __name__=="__main__":
    if len(sys.argv) > 1:
        question = sys.argv[1]
    else:
        question = "What type of mammal lays the biggest eggs?"
    print(question)
    answer = fact_check(question)
    print(answer)

--f=c:\Users\antoi\AppData\Roaming\jupyter\runtime\kernel-v334fb50e8befa2e6760c5e7d03bfeecaf360d29f5.json


OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [25]:
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import pipeline

# Charger le tokenizer et le modèle pré-entraîné
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Créer un pipeline de classification
classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)

# Exemple de déclaration à vérifier
declaration = "Mickael Jackson is alive"

# Classification de la déclaration
result = classifier(declaration)

# Afficher le résultat

if result[0]['score'] < 0.5:
    print("False") 
else: 
    print("True")
print(result[0]['score'])


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


True
0.7007076740264893
