# Flirting Detection

### Creating Required Audio files for Testing
Commenting the below code as the audio files are already uploaded in the Proof hub

In [14]:
# pip install gtts


#### No Flirting Audio

In [15]:
# from gtts import gTTS

# sentences = [
# "when is the due date for this task?"
# "Could you repeat the statement?"
#     "Could you help me with this document?"
# ]


# text = " ".join(sentences)


# tts = gTTS(text, lang='en')


# audio_file = "no_flirting.wav"
# tts.save(audio_file)

# print(f"Audio file '{audio_file}' has been created.")


#### Flirting Audio

In [16]:
# from gtts import gTTS

# sentences = [
#     "You have a beautiful smile.",
#     "Can you tell me more about yourself?",
#     "I love your eyes.",
#     "What a lovely dress you have on.",
#     "Could you help me with this document?"
# ]


# text = " ".join(sentences)

# tts = gTTS(text, lang='en')

# audio_file = "flirting_audio.mp3"
# tts.save(audio_file)

# print(f"Audio file '{audio_file}' has been created.")


#### Installing Required packages

In [17]:
pip install SpeechRecognition pydub nltk pandas scikit-learn


Note: you may need to restart the kernel to use updated packages.


#### Importing Required libraries

In [18]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import json
from pymongo import MongoClient
import speech_recognition as sr
from pydub import AudioSegment
import os


#### Function to convert audio to text

In [19]:

def audio_to_text(audio_file):
    recognizer = sr.Recognizer()
    audio = AudioSegment.from_file(audio_file)
    audio.export("temp.wav", format="wav")
    with sr.AudioFile("temp.wav") as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data)
            return text
        except sr.UnknownValueError:
            return ""
        except sr.RequestError:
            return ""





#### Text preprocessing function

In [20]:

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    tokens = word_tokenize(text)
    
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word not in stop_words]
    
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]
    
    return ' '.join(lemmatized_tokens)

#### Function to save prediction to JSON

In [21]:

def save_prediction_to_json(prediction_result, json_file="flirting_output.json"):
    if os.path.isfile(json_file):
        with open(json_file, 'r') as file:
            data = json.load(file)
    else:
        data = []

    data.append(prediction_result)
    with open(json_file, 'w') as file:
        json.dump(data, file, indent=4)





#### Function to load JSON data into MongoDB

In [22]:

def load_json_to_mongodb(json_file_path, db_name, collection_name, mongo_uri="mongodb://localhost:27017"):
    client = MongoClient(mongo_uri)
    db = client[db_name]
    collection = db[collection_name]

    with open(json_file_path, 'r') as file:
        data = json.load(file)
        if isinstance(data, list):
            collection.insert_many(data)
        else:
            collection.insert_one(data)

    print(f"Data from {json_file_path} has been successfully imported into the {db_name}.{collection_name} collection.")

#### Preprocessing and Model Training

In [32]:
# Loading the data from CSV 
data = pd.read_csv('flirting_detection_dataset.csv')

# Extracting texts and labels from the DataFrame
texts = data['Text'].tolist()
labels = data['Label'].tolist()

# Preprocessing texts
preprocessed_texts = [preprocess_text(text) for text in texts]

# Feature extraction
vectorizer = TfidfVectorizer(ngram_range=(1, 3))
X = vectorizer.fit_transform(preprocessed_texts)
y = labels

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = MultinomialNB()
model.fit(X_train, y_train)



#### Function to predict flirting from audio input

In [24]:

def predict_flirting(input_audio):
    text = audio_to_text(input_audio)
    processed_text = preprocess_text(text)
    X_input = vectorizer.transform([processed_text])
    prediction = model.predict(X_input)
    prediction_text = 'Flirting Detected' if prediction == 1 else 'No Flirting Detected'
    result = {
        "Text": text,
        "Prediction": prediction_text
    }
    return result



#### Usage

In [25]:

audio_file = 'flirting_audio.wav'
prediction_result = predict_flirting(audio_file)
print(prediction_result)

# Save prediction to JSON
save_prediction_to_json(prediction_result)

# Load JSON data into MongoDB
load_json_to_mongodb("flirting_output.json", 'flirting_db', 'flirting_predictions')

{'Text': 'you have a beautiful smile can you tell me more about yourself I love your eyes what a lovely dress you have on could you help me with this document', 'Prediction': 'Flirting Detected'}
Data from flirting_output.json has been successfully imported into the flirting_db.flirting_predictions collection.
