In [None]:
import os
import pandas as pd
from flask import Flask, request, redirect, url_for, render_template_string
from pymongo import MongoClient

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = 'uploads/'
app.config['CLEANED_FOLDER'] = 'cleaned_data/'

# Ensure the upload and cleaned data folders exist
if not os.path.exists(app.config['UPLOAD_FOLDER']):
    os.makedirs(app.config['UPLOAD_FOLDER'])

if not os.path.exists(app.config['CLEANED_FOLDER']):
    os.makedirs(app.config['CLEANED_FOLDER'])

# Define the HTML template as a string
html_template = '''
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Upload Leaked Data</title>
</head>
<body>
    <h1>Upload Leaked Data</h1>
    <form action="/upload" method="post" enctype="multipart/form-data">
        <label for="file">Choose text file:</label>
        <input type="file" id="file" name="file" accept=".txt">
        <button type="submit">Upload</button>
    </form>
    {% if data %}
    <h2>Data Summary</h2>
    <table border="1">
        <tr>
            <th>Email</th>
            <th>Password</th>
        </tr>
        {% for row in data %}
        <tr>
            <td>{{ row['email'] }}</td>
            <td>{{ row['password'] }}</td>
        </tr>
        {% endfor %}
    </table>
    {% if cleaned_file %}
    <p>Cleaned data saved to: <a href="{{ cleaned_file }}" target="_blank">{{ cleaned_file }}</a></p>
    {% endif %}
    {% endif %}
</body>
</html>
'''

@app.route('/')
def index():
    return render_template_string(html_template)

@app.route('/upload', methods=['POST'])
def upload_file():
    try:
        if 'file' not in request.files:
            return redirect(request.url)
        
        file = request.files['file']
        if file.filename == '':
            return redirect(request.url)
        
        if file and file.filename.endswith('.txt'):
            filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
            file.save(filepath)
            
            # Process the file
            try:
                data, cleaned_file_path = process_file(filepath)
                return render_template_string(html_template, data=data, cleaned_file=cleaned_file_path)
            except Exception as e:
                return f"An error occurred during file processing: {e}"
        
        return 'Invalid file format. Please upload a .txt file.'
    except Exception as e:
        return f"An error occurred during file upload: {e}"

def process_file(filepath):
    try:
        print(f"Processing file: {filepath}")
        data = pd.read_csv(filepath, sep=':', header=None, names=['email', 'password'])
        print("Data loaded into DataFrame:")
        print(data.head())
        
        # Clean the data (drop duplicates, handle missing values)
        data.dropna(inplace=True)
        data.drop_duplicates(inplace=True)
        
        # Save the cleaned data to a new file
        cleaned_file_path = os.path.join(app.config['CLEANED_FOLDER'], f"cleaned_{os.path.basename(filepath)}")
        data.to_csv(cleaned_file_path, index=False, sep=':')
        
        # Store the data in MongoDB
        store_in_mongodb(data)
        return data.to_dict('records'), cleaned_file_path
    except Exception as e:
        print(f"Error processing file: {e}")
        raise

def store_in_mongodb(data):
    try:
        client = MongoClient('mongodb://localhost:27017/')
        db = client['leaked_data']
        collection = db['leaks']
        collection.insert_many(data.to_dict('records'))
        print("Data inserted into MongoDB")
    except Exception as e:
        print(f"Error storing data in MongoDB: {e}")
        raise

if __name__ == '__main__':
    app.run(debug=True, use_reloader=False)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [10/Aug/2024 17:57:42] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [10/Aug/2024 17:57:42] "GET /favicon.ico HTTP/1.1" 404 -


Processing file: uploads/leakdataset.txt
Data loaded into DataFrame:
                  email  password
0  tana_2001-10@mail.ru    dragon
1       loki_98@mail.ru    monkey
2      olgagold07@bk.ru  football
3      detka_zu@mail.ru  1234589a
4  vova_sokolik@mail.ru   volokos


127.0.0.1 - - [10/Aug/2024 17:57:48] "POST /upload HTTP/1.1" 200 -


Data inserted into MongoDB


In [None]:
from threading import Thread

def run_flask():
    app.run(debug=True, use_reloader=False)

# Start Flask in a separate thread
flask_thread = Thread(target=run_flask)
flask_thread.start()