# Initializing

<div class="alert alert-block alert-info">
<!-- <b>Tip:</b> Use blue boxes (alert-info) for tips and notes.  -->
Importing Libraries
</div>

In [6]:
from sentence_transformers import SentenceTransformer, util
import numpy as np
import torch
import re, os, json, csv
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, NLTKTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from sentence_transformers import SentenceTransformer 
from langchain.embeddings import GPT4AllEmbeddings
import os
import shutil
import spacy
import pandas as pd
import re
import textwrap

<div class="alert alert-block alert-info">
<!-- <b>Tip:</b> Use blue boxes (alert-info) for tips and notes.  -->
Define Query & Search Type
</div>

In [7]:
searchType = "Product"
query = "Can you list watches between Rs. 1000 and Rs. 2000?"
# # query = "Can you find wall chargers that always ship on time?"
# query = "Show me tablet accessories with more than 90% positive ratings."

# searchType = "Main"
# # query = "What are your refund policies?"
# query = "What are discount option available for HBL card users?"

# searchType = "Seller"
# query = "Can I make product bundles on Daraz?"
# # query = "What are your refund policies?"

if query  != "What is Daraz?":
    query = re.sub(r'\bDaraz\b\s*', '', query, flags=re.IGNORECASE)
    
chunkSize = 1500

In [8]:
if os.path.exists("/Users/moiz/Moiz/Github/ITA-Project/outputCleaned.txt"):
    os.remove("/Users/moiz/Moiz/Github/ITA-Project/outputCleaned.txt")

if os.path.exists("/Users/moiz/Library/CloudStorage/OneDrive-InstituteofBusinessAdministration/IBA/6th - Spring 2024/ITA/Project/outputCleanedSummarised.txt"):
    os.remove("/Users/moiz/Library/CloudStorage/OneDrive-InstituteofBusinessAdministration/IBA/6th - Spring 2024/ITA/Project/outputCleanedSummarised.txt")

# Collecting Data & Preprocessing

<div class="alert alert-block alert-info">
<!-- <b>Tip:</b> Use blue boxes (alert-info) for tips and notes.  -->
Define Required Functions
</div>

In [9]:
nlp = spacy.load('en_core_web_sm')

subjects = [
    "Phone Cases", "Power Banks", "iPhone Cables", "Android Cables", "Wall Chargers",
    "Wireless Chargers", "Tablet Accessories", "Car Chargers", "Screen Protectors",
    "Phone Camera Flash", "Lights", "Selfie Sticks", "Bluetooth Headphones",
    "Wireless Earbuds", "Mono Headsets", "Headphones", "Wired Headsets", "Smartwatches",
    "Fitness", "Trackers", "Fitness Tracker", "Virtual Reality", "Memory Cards",
    "Lenses", "Tripods", "Monopods", "Camera Cases", "Camera", "Gimbals", "Batteries",
    "Cooling Pads", "Keyboards", "Watches"
]

headers = [
    "Product Number", "Product Name", "Product Category", "Brand Name", "Seller Name", 
    "Price Details", "Positive Seller Ratings", "Ship on Time", "Return Policy"
]

def is_paragraph_break(line):
    return line.strip() == ""

def is_unwanted_line(line):
    return line.strip().endswith(":")

def process_files(folder_path, output_file):
    files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]
    all_text = []
    
    for file in files:
        current_paragraph = []
        with open(os.path.join(folder_path, file), 'r', encoding='utf-8') as f:
            for line in f:
                if is_unwanted_line(line):
                    continue  
                if is_paragraph_break(line):
                    if current_paragraph:
                        all_text.append(" ".join(current_paragraph))
                        current_paragraph = []
                else:
                    current_paragraph.append(line.strip())
            if current_paragraph:
                all_text.append(" ".join(current_paragraph))
    
    all_text = [line for line in all_text if len(line) >= 100]
    
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write("\n".join(all_text))

def normalize_subjects(subjects):
    """Lemmatize and normalize subjects for easier matching."""
    normalized_subjects = {}
    for subject in subjects:
        doc = nlp(subject.lower())
        normalized = '-'.join([token.lemma_ for token in doc])
        normalized_subjects[normalized] = subject  
    return normalized_subjects

def find_subject_in_query(query, subjects):
    """Find a subject in the lemmatized and normalized query."""
    normalized_subjects = normalize_subjects(subjects)
    doc = nlp(query.lower())
    lemmatized_query = '-'.join([token.lemma_ for token in doc])

    for normalized, original in normalized_subjects.items():
        if normalized in lemmatized_query:
            return original
    return "No subject found"

def read_product_files(directory):
    products_data = []
    for filename in os.listdir(directory):
        if filename.endswith('.txt'):
            file_path = os.path.join(directory, filename)
            with open(file_path, 'r') as file:
                data = file.read()
                corrected_data = '[' + data.replace('}\n\n{', '},\n{') + ']'
                try:
                    product_info = json.loads(corrected_data)
                    products_data.append(product_info)
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON from {filename}: {e}")
    return products_data

def extract_description(description_text):
    desc_start = description_text.find("Product Description:")
    if desc_start != -1:
        desc_substr = description_text[desc_start:]
        desc_end = desc_substr.find("<br/>")
        if desc_end != -1:
            return desc_substr[len("Product Description:"):desc_end].strip()
        else:
            return desc_substr[len("Product Description:"):].strip()
    return "Description not found."

def write_product_info(products_data, output_file):
    with open(output_file, 'w') as outfile:
        for i, product in enumerate(products_data, start=1):
            product_dict = {}
            for segment in product:
                product_dict.update(segment)

            product_name = product_dict.get("Product Name", "N/A")
            category_path = product_dict.get("Category", "N/A").replace('"', '')
            brand_name = product_dict.get("Brand Name", "N/A")
            seller_name = product_dict.get("Seller Name", "N/A")
            url = product_dict.get("URL", "N/A")
            price_info = product_dict.get("Price Info", [])
            price_details = " | ".join([f"Original: {p[1]}, Discounted: {p[2]}" for p in price_info])
            additional_info = product_dict.get("Additional Info", {})
            positive_ratings = additional_info.get("Positive Seller Ratings", "N/A")
            ship_on_time = additional_info.get("Ship on Time", "N/A")
            return_policy = product_dict.get("Return Policy", {})
            return_details = f"{return_policy.get('Title', 'N/A')} ({return_policy.get('Subtitle', 'N/A')})"

            # product_entry = f"Product {i:02d}: Product Name = {product_name}, Product Category = {category_path}, Brand Name = {brand_name}, Seller Name = {seller_name}, URL = {url}, Price Details = {price_details}, Description = {description}, Positive Seller Ratings = {positive_ratings}, Ship on Time = {ship_on_time}, Return Policy = {return_details}\n"
            product_entry = f"Product {i:02d}: Product Name = {product_name}, Product Category = {category_path}, Brand Name = {brand_name}, Seller Name = {seller_name}, URL = {url}, Price Details = {price_details}, Positive Seller Ratings = {positive_ratings}, Ship on Time = {ship_on_time}, Return Policy = {return_details}\n"
            outfile.write(product_entry)

def parse_line(line):
    pattern = re.compile(
        r"Product Name = (?P<Product_Name>.*?)(?=, Product Category =)|"
        r"Product Category = (?P<Product_Category>.*?)(?=, Brand Name =)|"
        r"Brand Name = (?P<Brand_Name>.*?)(?=, Seller Name =)|"
        r"Seller Name = (?P<Seller_Name>.*?)(?=, URL =)|"
        r"Price Details = (?P<Price_Details>.*?)(?=, Positive Seller Ratings =)|"
        r"Positive Seller Ratings = (?P<Positive_Seller_Ratings>.*?)(?=, Ship on Time =)|"
        r"Ship on Time = (?P<Ship_on_Time>.*?)(?=, Return Policy =)|"
        r"Return Policy = (?P<Return_Policy>.*?)(?=, Product \d+:|, URL =|$)"
    )

    product_number = re.match(r"Product (\d+):", line).group(1)

    matches = pattern.finditer(line)
    data = {k: v for m in matches for k, v in m.groupdict().items() if v is not None}

    return [
        "Product " + product_number,
        data.get("Product_Name", ""),
        data.get("Product_Category", ""),
        data.get("Brand_Name", ""),
        data.get("Seller_Name", ""),
        data.get("Price_Details", ""),
        data.get("Positive_Seller_Ratings", ""),
        data.get("Ship_on_Time", ""),
        data.get("Return_Policy", "")
    ]

def extract_info_simple(query):
    subject_keywords = ["watch", "watches", "smartwatch", "luxury watch"]
    brand_names = products_df['Brand Name'].str.lower().unique().tolist()
    seller_names = products_df['Seller Name'].str.lower().unique().tolist()

    price_pattern = r"Rs\.\s*\d+|\d+\s*%|between\s*Rs\.\s*\d+\s*and\s*Rs\.\s*\d+"
    rating_pattern = r"more than \d{1,3}% positive ratings|less than \d{1,3}% positive ratings|\d{1,3}% positive ratings|\d{1,3}%"
    time_pattern = r"ship on time"
    
    # Find subjects
    subjects = [keyword for keyword in subject_keywords if keyword in query.lower()]
    subjects.extend([brand for brand in brand_names if brand in query.lower()])
    
    # Find limitations
    limitations = re.findall(price_pattern, query)
    limitations.extend(re.findall(rating_pattern, query))
    if "top-rated sellers" in query.lower() or "highly rated sellers" in query.lower():
        limitations.append("top-rated sellers")
    if re.search(time_pattern, query, re.IGNORECASE):
        limitations.append("ship on time")

    for seller in seller_names:
        if seller in query.lower():
            limitations.append(f"sold by {seller}")

    # return {"subjects": subjects, "limitations": limitations}
    return limitations

def load_data(filepath):
    """Load the product data from a CSV file and preprocess it."""
    data = pd.read_csv(filepath)
    data['Discounted Price'] = data['Price Details'].apply(
        lambda x: min(map(int, re.findall(r'Discounted: Rs\. (\d+)', x)))
    )
    data['Positive Seller Ratings'] = data['Positive Seller Ratings'].str.rstrip('%').astype(int)
    data['Ship on Time'] = data['Ship on Time'].str.rstrip('%').astype(int)
    return data

def parse_limitation(limitation):
    """Parse the limitation string into a structured dictionary."""
    if 'between Rs.' in limitation:
        low, high = map(int, re.findall(r'\d+', limitation))
        return {'price_range': (low, high)}
    elif 'Rs.' in limitation:
        price = int(re.findall(r'\d+', limitation)[0])
        return {'price_exact': price}
    elif 'sold by' in limitation:
        seller = limitation.split('sold by ')[1].strip()
        return {'seller_name': seller}
    elif 'top-rated sellers' in limitation:
        return {'top_rated_sellers': 90}
    elif '%' in limitation:
        rating = int(re.findall(r'\d+', limitation)[0])
        return {'top_rated_sellers': rating}
    elif 'ship on time' in limitation:
        return {'ship_on_time': 100}
    else:
        return None  

def filter_productsTwo(data, limitation_dict):
    """Apply filters to the data based on parsed limitations."""
    if limitation_dict is None:
        return []
    key, value = next(iter(limitation_dict.items()))
    if key == 'price_exact':
        filtered_data = data[data['Discounted Price'] == value]
    elif key == 'price_range':
        filtered_data = data[(data['Discounted Price'] >= value[0]) & (data['Discounted Price'] <= value[1])]
    elif key == 'seller_name':
        filtered_data = data[data['Seller Name'].str.contains(value, case=False, na=False)]
    elif key == 'top_rated_sellers':
        filtered_data = data[data['Positive Seller Ratings'] >= value]
    elif key == 'ship_on_time':
        filtered_data = data[data['Ship on Time'] == value]
    return filtered_data['Product Number'].tolist()

def filter_products(input_filename, output_filename, matching_product_numbers):
    with open(input_filename, 'r') as file:
        lines = file.readlines()

    matching_lines = []

    if matching_product_numbers:
        product_set = set(matching_product_numbers)
        for line in lines:
            product_number = line.split(':', 1)[0].strip()
            if product_number in product_set:
                matching_lines.append(line)
    else:
        matching_lines = lines

    with open(output_filename, 'w') as file:
        file.writelines(matching_lines)

<div class="alert alert-block alert-info">
<!-- <b>Tip:</b> Use blue boxes (alert-info) for tips and notes.  -->
Defining Directories Based on Query Type
</div>

In [10]:
if searchType.lower() == "main":
    folder_path = '/Users/moiz/Moiz/Github/ITA-Project/Data/DarazDataMain'  
    output_file = 'DarazDataMain.txt'  
    process_files(folder_path, output_file)
    file_name = "DarazDataMain.txt"
elif searchType.lower() == "seller":
    folder_path = '/Users/moiz/Moiz/Github/ITA-Project/Data/DarazDataSeller'  
    output_file = 'DarazDataSeller.txt'  
    process_files(folder_path, output_file)
    file_name = "DarazDataSeller.txt"
elif searchType.lower() == "product":  
    result = find_subject_in_query(query, subjects)
    words = result.split()
    result = '-'.join(words) if len(words) > 1 else result
    directory_path = 'Data/Products/' + str(result)
    print(directory_path)
    products_data = read_product_files(directory_path)
    output_file = 'FinalProductsList.txt'
    write_product_info(products_data, output_file)
    input_file_path = 'FinalProductsList.txt'
    output_csv_path = 'FinalProductsList.csv'
    with open(input_file_path, 'r', encoding='utf-8') as file, \
        open(output_csv_path, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(headers) 
        
        for line in file:
            if line.strip(): 
                row = parse_line(line)
                writer.writerow(row) 

    products_df = pd.read_csv('FinalProductsList.csv')

    products_df.replace('N/A', np.nan, inplace=True)
    numeric_cols = products_df.select_dtypes(include=[np.number]).columns
    products_df[numeric_cols] = products_df[numeric_cols].fillna(products_df[numeric_cols].mean())

    def load_dataTwo(filepath, output_filepath):
        data = pd.read_csv(filepath)

        for column in data.columns:
            if data[column].dtype == 'float64' or data[column].dtype == 'int64':
                data[column].fillna(data[column].mean(), inplace=True)
            else:
                mode_value = data[column].mode()[0]
                data[column].fillna(mode_value, inplace=True)

        data.to_csv(output_filepath, index=False)

        return data

    output_file_path = 'ProcessedFinalProductsList.csv'

    data = load_dataTwo('FinalProductsList.csv', output_file_path)

    extracted_info = extract_info_simple(query)

    limitation = "', '".join(extracted_info)
    limitation = f"'{limitation}'"

    data = load_data('ProcessedFinalProductsList.csv')

    parsed_limitation = parse_limitation(limitation)

    matching_product_numbers = filter_productsTwo(data, parsed_limitation)

    filter_products('FinalProductsList.txt', 'ProductsList.txt', matching_product_numbers)

    file_name = "ProductsList.txt"

Data/Products/Watches


# Retriever Phase

<div class="alert alert-block alert-info">
<!-- <b>Tip:</b> Use blue boxes (alert-info) for tips and notes.  -->
Creating Sentence Embeddings
</div>

In [11]:
with open(file_name, 'r', encoding="utf8") as f:
    products = f.readlines()

model = SentenceTransformer('all-MiniLM-L6-v2')
product_embeddings = model.encode(products, convert_to_tensor=True)

def search_products(query, k):
    query_embedding = model.encode(query, convert_to_tensor=True)
    cosine_scores = util.pytorch_cos_sim(query_embedding, product_embeddings)[0]
    top_results = torch.topk(cosine_scores, k=k)

    print("Query:", query)
    for score, idx in zip(top_results[0], top_results[1]):
        print("\nScore:", score.item())
        print("Product Details:", products[idx])

search_products(query, 2)

Query: Can you list watches between Rs. 1000 and Rs. 2000?

Score: 0.5335537791252136
Product Details: Product 04: Product Name = Men Quartz Watch for Waterproof Vacuum Plating Strap with Car Wheel Rim Hub Design Sport Automatic Wristwatch (Does not rotate), Product Category = Watches Sunglasses Jewellery/Watches/Men/Fashion, Brand Name = No Brand, Seller Name = D&K Online, URL = https://www.daraz.pk/products/-i411043073-s1964553711.html?search=1, Price Details = Original: Rs. 3999, Discounted: Rs. 1768 | Original: Rs. 3999, Discounted: Rs. 1869 | Original: Rs. 3999, Discounted: Rs. 1769 | Original: Rs. 3999, Discounted: Rs. 1768 | Original: Rs. 3999, Discounted: Rs. 1868, Positive Seller Ratings = 82%, Ship on Time = 100%, Return Policy = 14 days free & easy return (Change of mind is not applicable)


Score: 0.4519554078578949
Product Details: Product 10: Product Name = 2022 trend full-automatic quartz movement men's watch wheel non mechanical watch wrist watch fashion men's watch, Pr

In [12]:
loader = TextLoader(file_name)
docs = loader.load()

<div class="alert alert-block alert-info">
<!-- <b>Tip:</b> Use blue boxes (alert-info) for tips and notes.  -->
Initializing Vector DB
</div>

In [13]:
import os
import re
import shutil

def manage_chroma_folders(directory):
    global chromaCounter
    chromaCounter = 0 

    highest_number = 0

    pattern = re.compile(r'^chromaDB(\d+)$')

    items = os.listdir(directory)

    for item in items:
        if os.path.isdir(os.path.join(directory, item)):
            match = pattern.match(item)
            if match:
                number = int(match.group(1))
                if number > highest_number:
                    highest_number = number

    chromaCounter = highest_number + 1

    for item in items:
        if os.path.isdir(os.path.join(directory, item)) and item.startswith("chroma"):
            full_path = os.path.join(directory, item)
            shutil.rmtree(full_path)

directory = "/Users/moiz/Moiz/Github/ITA-Project"
manage_chroma_folders(directory)

with open('chromaCounter.txt', 'w') as file:
    file.write(f"Current Chroma Counter = {chromaCounter}")

<div class="alert alert-block alert-info">
<!-- <b>Tip:</b> Use blue boxes (alert-info) for tips and notes.  -->
Chunker 01 = Character Text Splitter
</div>

In [14]:
chunker01 = "Fixed-size (in characters) Overlapping Sliding Window"

text_splitter = CharacterTextSplitter(separator="\n", chunk_size=chunkSize, chunk_overlap=20)
splits = text_splitter.split_documents(docs)

persist_directory = 'chromaDB' + str(chromaCounter) + '/'
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=GPT4AllEmbeddings(),
    persist_directory=persist_directory
)
vectordb.persist()
docs01 = vectordb.similarity_search_with_score(query, k=2)

<div class="alert alert-block alert-info">
<!-- <b>Tip:</b> Use blue boxes (alert-info) for tips and notes.  -->
Chunker 02 = Recursive Character Text Splitter
</div>

In [15]:
chunker02 = "Recursive Structure Aware Splitting"

# text_splitter = RecursiveCharacterTextSplitter(separators = ["\n\n", "\n"], chunk_size=chunkSize, chunk_overlap=20)
text_splitter = RecursiveCharacterTextSplitter(separators = ["\n\n", "\n", " ", ""], chunk_size=chunkSize, chunk_overlap=20)
splits = text_splitter.split_documents(docs)

persist_directory = 'chromaDB' + str(chromaCounter) + '/'
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=GPT4AllEmbeddings(),
    persist_directory=persist_directory
)
vectordb.persist()
docs02 = vectordb.similarity_search_with_score(query, k=2)

<div class="alert alert-block alert-info">
<!-- <b>Tip:</b> Use blue boxes (alert-info) for tips and notes.  -->
Chunker 03 = NLTK Text Splitter
</div>

In [16]:
if searchType.lower() != "product":
    chunker03 = "NLP Chunking: Tracking Topic Changes"

    text_splitter = NLTKTextSplitter(chunk_size=chunkSize)#, separator="\n")
    splits = text_splitter.split_documents(docs)

    persist_directory = 'chromaDB' + str(chromaCounter) + '/'
    vectordb = Chroma.from_documents(
        documents=splits,
        embedding=GPT4AllEmbeddings(),
        persist_directory=persist_directory
    )
    vectordb.persist()
    docs03 = vectordb.similarity_search_with_score(query, k=2)

<div class="alert alert-block alert-info">
<!-- <b>Tip:</b> Use blue boxes (alert-info) for tips and notes.  -->
Combining into one collective file. Cleaning & Removing Duplicates Then
</div>

In [17]:
if searchType.lower() == "product":
    print("question = \"" + query + "\"")

    with open("output.txt", "w") as file:
        for result in docs01:
            file.write(result[0].page_content+ "\n")

        for result in docs02:
            file.write(result[0].page_content+ "\n")

    def remove_duplicates(input_file, output_file):
        try:
            with open(input_file, 'r') as file:
                lines = file.readlines()
            
            unique_lines = []
            seen = set()
            for line in lines:
                if line not in seen:
                    seen.add(line)
                    unique_lines.append(line)
            
            with open(output_file, 'w') as file:
                file.writelines(unique_lines)
            
        except Exception as e:
            print(f"An error occurred: {e}")

    remove_duplicates('output.txt', 'outputCleaned.txt')



question = "Can you list watches between Rs. 1000 and Rs. 2000?"


In [18]:
if searchType.lower() == "main" or searchType.lower() == "seller":
    print("question = \"" + query + "\"")

    with open("output.txt", "w") as file:
        count = 1
        for result in docs01:
            file.write(f"response{count} = \"{result[0].page_content}\"\n")
            count += 1

        count = 1
        for result in docs02:
            file.write(f"response{count} = \"{result[0].page_content}\"\n")
            count += 1

        count = 1
        for result in docs03:
            file.write(f"response{count} = \"{result[0].page_content}\"\n")
            count += 1

    def clean_text(text):
        text = re.sub(r"\S+\.(png|jpg|jpeg|gif)\s*", "", text)
        text = re.sub(r"\s+", " ", text).strip()
        text = re.sub(r"isnot", "is not", text)
        text = re.sub(r"orBrand", "or Brand", text)
        text = re.sub(r"ourWarranty", "our Warranty", text)
        text = re.sub(r"response\d+\s*=\s*\"", "", text)
        text = re.sub(r"\d+\.", "", text)
        text = re.sub(r"\d+-", "", text)

        # Deduplicate text
        lines = text.split('.')
        seen = set()
        unique_lines = []
        for line in lines:
            line_clean = line.strip()
            if line_clean not in seen:
                seen.add(line_clean)
                unique_lines.append(line_clean)

        # Reconstruct text with clean lines
        cleaned_text = '. '.join(unique_lines).strip()
        if not cleaned_text.endswith('.'):
            cleaned_text += '.'

        # Split into multiple lines with a reasonable width
        wrapped_text = textwrap.fill(cleaned_text, width=100)

        return wrapped_text

    with open('output.txt', 'r') as file:
        raw_text = file.read()
        
    cleaned_text = clean_text(raw_text)

    with open('outputCleaned.txt', 'w') as file:
        file.write(cleaned_text)

<div class="alert alert-block alert-info">
<!-- <b>Tip:</b> Use blue boxes (alert-info) for tips and notes.  -->
Applying Extractive Summary
</div>

In [19]:
if searchType.lower() != "product":
    from sumy.parsers.plaintext import PlaintextParser
    from sumy.nlp.tokenizers import Tokenizer
    from sumy.summarizers.lsa import LsaSummarizer

    def summarize_text(file_path, language="english", summary_length=10):
        with open(file_path, 'r', encoding='utf-8') as file:
            text = file.read()
        parser = PlaintextParser.from_string(text, Tokenizer(language))
        
        summarizer = LsaSummarizer()
        
        summary = summarizer(parser.document, summary_length)
        
        summarized_text = ' '.join(str(sentence) for sentence in summary)
        
        with open("outputCleanedSummarised.txt", 'w', encoding='utf-8') as file:
            file.write(summarized_text)

    file_path = 'outputCleaned.txt'
    summarize_text(file_path)

<div class="alert alert-block alert-info">
<!-- <b>Tip:</b> Use blue boxes (alert-info) for tips and notes.  -->
Cleaning Non-Important Files
</div>

In [20]:
if os.path.exists("/Users/moiz/Moiz/Github/ITA-Project/DarazDataMain.txt"):
    os.remove("/Users/moiz/Moiz/Github/ITA-Project/DarazDataMain.txt")

if os.path.exists("/Users/moiz/Moiz/Github/ITA-Project/DarazDataSeller.txt"):
    os.remove("/Users/moiz/Moiz/Github/ITA-Project/DarazDataSeller.txt")

if os.path.exists("/Users/moiz/Moiz/Github/ITA-Project/output.txt"):
    os.remove("/Users/moiz/Moiz/Github/ITA-Project/output.txt")

if os.path.exists("/Users/moiz/Moiz/Github/ITA-Project/FinalProductsList.csv"):
    os.remove("/Users/moiz/Moiz/Github/ITA-Project/FinalProductsList.csv")

if os.path.exists("/Users/moiz/Moiz/Github/ITA-Project/ProcessedFinalProductsList.csv"):
    os.remove("/Users/moiz/Moiz/Github/ITA-Project/ProcessedFinalProductsList.csv")

if os.path.exists("/Users/moiz/Moiz/Github/ITA-Project/FinalProductsList.txt"):
    os.remove("/Users/moiz/Moiz/Github/ITA-Project/FinalProductsList.txt")

if os.path.exists("/Users/moiz/Moiz/Github/ITA-Project/ProductsList.txt"):
    os.remove("/Users/moiz/Moiz/Github/ITA-Project/ProductsList.txt")