# Attachment generator

In [None]:
import os

def create_exact_size_file(file_path, size_mb):
    # Calculate the exact size in bytes
    size_bytes = size_mb * 1024 * 1024

    # Open the file in write binary ('wb') mode
    with open(file_path, 'wb') as file:
        file.seek(size_bytes - 1)  # Move to the position one byte before the desired size
        file.write(b'\0')  # Write a single zero byte at this position

def generate_files(directory_path, excel_filename, pdf_filename, target_size_mb):
    # Ensure the directory ends with a slash
    if not directory_path.endswith('/'):
        directory_path += '/'

    # Generate exact-sized Excel and PDF files
    create_exact_size_file(directory_path + excel_filename, target_size_mb)
    create_exact_size_file(directory_path + pdf_filename, target_size_mb)

    print(f'Generated Excel file at {directory_path + excel_filename} with size exactly {target_size_mb} MB')
    print(f'Generated PDF file at {directory_path + pdf_filename} with size exactly {target_size_mb} MB')

# Set the directory where files will be saved
directory_path = "/Users/ikawahyuni/dataquality/dummy_files"

# Set filenames and target size
generate_files(directory_path, 'dummy_15mb.xlsx', 'dummy_15mb.pdf', 15)


# 14 MB

In [1]:
import os

def create_exact_size_file(file_path, size_mb):
    # Calculate the exact size in bytes
    size_bytes = size_mb * 1024 * 1024

    # Open the file in write binary ('wb') mode
    with open(file_path, 'wb') as file:
        file.seek(size_bytes - 1)  # Move to the position one byte before the desired size
        file.write(b'\0')  # Write a single zero byte at this position

def generate_files(directory_path, excel_filename, pdf_filename, target_size_mb):
    # Ensure the directory ends with a slash
    if not directory_path.endswith('/'):
        directory_path += '/'

    # Generate exact-sized Excel and PDF files
    create_exact_size_file(directory_path + excel_filename, target_size_mb)
    create_exact_size_file(directory_path + pdf_filename, target_size_mb)

    print(f'Generated Excel file at {directory_path + excel_filename} with size exactly {target_size_mb} MB')
    print(f'Generated PDF file at {directory_path + pdf_filename} with size exactly {target_size_mb} MB')

# Set the directory where files will be saved
directory_path = "/Users/ikawahyuni/dataquality/dummy_files"

# Set filenames and target size (change from 15 to 14)
generate_files(directory_path, 'dummy_14mb.xlsx', 'dummy_14mb.pdf', 14)


Generated Excel file at /Users/ikawahyuni/dataquality/dummy_files/dummy_14mb.xlsx with size exactly 14 MB
Generated PDF file at /Users/ikawahyuni/dataquality/dummy_files/dummy_14mb.pdf with size exactly 14 MB


# maximum size limit of 10,485,760 bytes (10 MB)

In [10]:
import os

def create_exact_size_file(file_path, size_mb):
    # Maximum size in bytes (10 MB)
    max_size_bytes = 9485760

    # Calculate the desired size in bytes
    size_bytes = size_mb * 1024 * 1024

    # Adjust size if it exceeds the maximum allowed
    if size_bytes > max_size_bytes:
        size_bytes = max_size_bytes

    # Open the file in write binary ('wb') mode
    with open(file_path, 'wb') as file:
        file.seek(size_bytes - 1)  # Move to the position one byte before the desired size
        file.write(b'\0')  # Write a single zero byte at this position

def generate_files(directory_path, excel_filename, pdf_filename, target_size_mb):
    # Ensure the directory ends with a slash
    if not directory_path.endswith('/'):
        directory_path += '/'

    # Generate exact-sized Excel and PDF files
    create_exact_size_file(directory_path + excel_filename, target_size_mb)
    create_exact_size_file(directory_path + pdf_filename, target_size_mb)

    actual_size_mb = min(target_size_mb, 10)  # Limit the reported size to 10 MB max
    print(f'Generated Excel file at {directory_path + excel_filename} with size up to {actual_size_mb} MB')
    print(f'Generated PDF file at {directory_path + pdf_filename} with size up to {actual_size_mb} MB')

# Set the directory where files will be saved
directory_path = "D:\dataquality\dummy_files"

# Set filenames and target size (e.g., 12 MB would be reduced to 10 MB)
generate_files(directory_path, 'dummy.xlsx', 'dummy.pdf', 12)


Generated Excel file at D:\dataquality\dummy_files/dummy.xlsx with size up to 10 MB
Generated PDF file at D:\dataquality\dummy_files/dummy.pdf with size up to 10 MB


## Tool to collect all tickets 

In [1]:
import os
import zipfile
import rarfile  # pip install rarfile
import pandas as pd

def extract_and_process_files(directory):

    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        extract_path = os.path.join(directory, filename[:-4]) 

   
        if filename.endswith(".zip"):
            with zipfile.ZipFile(filepath, 'r') as zip_ref:
                zip_ref.extractall(extract_path)
                print(f"Extracted {filename} to {extract_path}")
        elif filename.endswith(".rar"):
            with rarfile.RarFile(filepath, 'r') as rar_ref:
                rar_ref.extractall(extract_path)
                print(f"Extracted {filename} to {extract_path}")
        else:
            continue
        
 
        report_file_path = find_report_file(extract_path)
        if report_file_path:
            df = create_dataframe_from_file(report_file_path, extract_path)
            csv_path = os.path.join(directory, filename[:-4] + '_files_info.csv')
            df.to_csv(csv_path, index=False)
            print(f"Data from {report_file_path} saved to CSV at {csv_path}")
        else:
            print(f"No 'ReportTest.arx' found in {extract_path}")

def find_report_file(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file == 'ReportTest.arx':
                return os.path.join(root, file)
    return None

def create_dataframe_from_file(file_path, extract_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    titles = []
    descriptions = []
    version_data = []
    first_publish_location_ids = []

    for line in lines:
        if line.startswith("DATA"):
            parts = line.strip().split('"')
            ticket_number = parts[1].strip()
            file_name = parts[3].strip().partition(' ')[2]  # Extracting everything after "1 "

            full_path = os.path.join(extract_path, file_name)

            titles.append(file_name)
            descriptions.append("")
            version_data.append(full_path)
            first_publish_location_ids.append(ticket_number)

    return pd.DataFrame({
        'Title': titles,
        'Description': descriptions,
        'VersionData': version_data,
        'FirstPublishLocationId': first_publish_location_ids
    })


directory_path = r'D:\Salesforce\archive\dataquality\Tool for attachments'
extract_and_process_files(directory_path)


Extracted ReportTest.zip to D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest
Data from D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest\ReportTest.arx saved to CSV at D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest_files_info.csv


## Tool to collect all tickets 2

In [7]:
import os
import zipfile
import rarfile  # pip install rarfile
import pandas as pd

def extract_and_process_files(directory):
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)

        # Extract the big zip file
        if filename.endswith(".zip"):
            extract_path = os.path.join(directory, filename[:-4])
            with zipfile.ZipFile(filepath, 'r') as zip_ref:
                zip_ref.extractall(extract_path)
                print(f"Extracted {filename} to {extract_path}")

            # Convert specific file types to zip within the extracted folder
            convert_files_to_zip(extract_path)

            # Process the extracted folder to find 'ReportTest.arx' and create the DataFrame
            report_file_path = find_report_file(extract_path)
            if report_file_path:
                df = create_dataframe_from_file(report_file_path, extract_path)
                csv_path = os.path.join(directory, filename[:-4] + '_files_info.csv')
                df.to_csv(csv_path, index=False)
                print(f"Data from {report_file_path} saved to CSV at {csv_path}")
            else:
                print(f"No 'ReportTest.arx' found in {extract_path}")

def convert_files_to_zip(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith((".txt", ".rtp", ".tif", ".rar",".rptdesign",".js")):
                file_path = os.path.join(root, file)
                new_zip_path = file_path[:-4] + ".zip"
                with zipfile.ZipFile(new_zip_path, 'w') as zip_ref:
                    zip_ref.write(file_path, file)
                os.remove(file_path)  # Optionally remove the original file
                print(f"Converted {file} to {new_zip_path}")

def find_report_file(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file == 'ReportTest.arx':
                return os.path.join(root, file)
    return None

def create_dataframe_from_file(file_path, extract_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    titles = []
    descriptions = []
    version_data = []
    path_on_client = []
    first_publish_location_ids = []

    for line in lines:
        if line.startswith("DATA"):
            parts = line.strip().split('"')
            ticket_number = parts[1].strip()
            file_name = parts[3].strip().partition(' ')[2]  # Extracting everything after "1 "

            # Check if the file was converted to zip
            original_file_path = os.path.join(extract_path, file_name)
            zip_file_path = original_file_path[:-4] + ".zip"
            if os.path.exists(zip_file_path):
                full_path = zip_file_path
            else:
                full_path = original_file_path

            titles.append(file_name)
            descriptions.append("")
            version_data.append(full_path)
            path_on_client.append(full_path)
            first_publish_location_ids.append(ticket_number)

    return pd.DataFrame({
        'Title': titles,
        'Description': descriptions,
        'VersionData': version_data,
        'PathOnClient': path_on_client,
        'FirstPublishLocationId': first_publish_location_ids
    })

directory_path = r'D:\Salesforce\archive\dataquality\Tool for attachments'
extract_and_process_files(directory_path)


Extracted ReportTest.zip to D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest
Converted .mongorc_0.js to D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest\.mongorc_.zip
Converted BRICareDetailReport_2_257.rptdesign to D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest\BRICareDetailReport_2_257.rptde.zip
Converted BRICareDetailReport_2_258.rptdesign to D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest\BRICareDetailReport_2_258.rptde.zip
Converted BRICareDetailReport_2_284.rptdesign to D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest\BRICareDetailReport_2_284.rptde.zip
Converted BRICareDetailReport_2_285.rptdesign to D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest\BRICareDetailReport_2_285.rptde.zip
Converted BRICareDetailReport_2_288.rptdesign to D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest\BRICareDetailReport_2_288.rptde.zip
Converted BRICareDetailReport_2_289.rptdesig

In [3]:
import os
import zipfile
import rarfile  # pip install rarfile
import pandas as pd

def extract_and_process_files(directory):
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        extract_path = os.path.join(directory, filename[:-4]) 

        if filename.endswith(".zip"):
            with zipfile.ZipFile(filepath, 'r') as zip_ref:
                zip_ref.extractall(extract_path)
                print(f"Extracted {filename} to {extract_path}")
        elif filename.endswith(".rar"):
            with rarfile.RarFile(filepath, 'r') as rar_ref:
                rar_ref.extractall(extract_path)
                print(f"Extracted {filename} to {extract_path}")
        else:
            continue

        report_file_path = find_report_file(extract_path)
        if report_file_path:
            df = create_dataframe_from_file(report_file_path, extract_path)
            csv_path = os.path.join(directory, filename[:-4] + '_files_info.csv')
            df.to_csv(csv_path, index=False)
            print(f"Data from {report_file_path} saved to CSV at {csv_path}")
        else:
            print(f"No 'ReportTest.arx' found in {extract_path}")

def find_report_file(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file == 'ReportTest.arx':
                return os.path.join(root, file)
    return None

def create_dataframe_from_file(file_path, extract_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    titles = []
    descriptions = []
    version_data = []
    first_publish_location_ids = []

    for line in lines:
        if line.startswith("DATA"):
            parts = line.strip().split('"')
            ticket_number = parts[1].strip()
            file_name = parts[3].strip().partition(' ')[2]  # Extracting everything after "1 "

            full_path = os.path.join(extract_path, file_name)

            titles.append(file_name)
            descriptions.append("")
            version_data.append(full_path)
            first_publish_location_ids.append(ticket_number)

    return pd.DataFrame({
        'Title': titles,
        'Description': descriptions,
        'VersionData': version_data,
        'PathOnClient': version_data,  # New column added here
        'FirstPublishLocationId': first_publish_location_ids
    })

directory_path = r'D:\Salesforce\archive\dataquality\Tool for attachments'
extract_and_process_files(directory_path)


Extracted ReportTest.zip to D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest
Data from D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest\ReportTest.arx saved to CSV at D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest_files_info.csv


In [6]:
import pandas as pd
import os

def create_dataframe_from_file(file_path, extract_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    titles = []
    descriptions = []
    version_data = []
    first_publish_location_ids = []

    for line in lines:
        if line.startswith("DATA"):
            parts = line.strip().split('"')
            ticket_number = parts[1].strip()
            file_name = parts[3].strip().partition(' ')[2]  # Extracting everything after "1 "

            full_path = os.path.join(r'E:\Attachment\report1', file_name)

            titles.append(file_name)
            descriptions.append("")
            version_data.append(full_path)
            first_publish_location_ids.append(ticket_number)

    return pd.DataFrame({
        'Title': titles,
        'Description': descriptions,
        'VersionData': version_data,
        'PathOnClient': version_data,  # New column added here
        'FirstPublishLocationId': first_publish_location_ids
    })

# Assume this is the path to the report file you uploaded
report_file_path = r"C:\Users\maste\Downloads\dataloader_v60.0.2\server\attachment\Report.arx"
# Create dataframe
df = create_dataframe_from_file(report_file_path, 'E:\\Attachment\\report1')
# Save to CSV
csv_path = r"C:\Users\maste\Downloads\dataloader_v60.0.2\server\attachment\Report.csv"
df.to_csv(csv_path, index=False)
print(f"Data from {report_file_path} saved to CSV at {csv_path}")


Data from C:\Users\maste\Downloads\dataloader_v60.0.2\server\attachment\Report.arx saved to CSV at C:\Users\maste\Downloads\dataloader_v60.0.2\server\attachment\Report.csv


In [11]:
## to create copied 1000 ine


import pandas as pd

path=r"C:\Users\maste\Downloads\bricare\file_for_attachment.csv"
df=pd.read_csv(path)

df = pd.concat([df] * 1000, ignore_index=True)
df.to_csv(path, index=False)

## to change the column FirstPublishLocationId with all values in ID column taken from case ID in salesforce

In [14]:
import pandas as pd

# Load the data
df1 = pd.read_csv(r"C:\Users\maste\Downloads\bricare\file_for_attachment.csv")  # Assuming this file contains the columns Title, Description, VersionData, PathOnClient, FirstPublishLocationId
df2 = pd.read_csv(r"C:\Users\maste\Downloads\bricare\extract_case_id_1000.csv")  # Assuming this file contains the column ID

# Check if both DataFrames have the same number of rows
if len(df1) != len(df2):
    raise ValueError("The number of rows in both files must be the same")

# Replace the 'FirstPublishLocationId' column in df1 with the 'ID' column from df2
df1['FirstPublishLocationId'] = df2['ID']

# Save the modified DataFrame to a new CSV file
# df1.to_csv('modified_file.csv', index=False)

print("The 'FirstPublishLocationId' column has been replaced with the 'ID' column values.")

df1.iloc[900:1000]


The 'FirstPublishLocationId' column has been replaced with the 'ID' column values.


Unnamed: 0,Title,Description,VersionData,PathOnClient,FirstPublishLocationId
900,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449KLYAY
901,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449KMYAY
902,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449KNYAY
903,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449KOYAY
904,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449KPYAY
...,...,...,...,...,...
995,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449LtYAI
996,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449LuYAI
997,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449LvYAI
998,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449LwYAI


In [16]:
## Slice into 5 files

import pandas as pd

# Load the file
file_path = r"C:\Users\maste\Downloads\bricare\file_for_attachment.csv" 
df = pd.read_csv(file_path)

# Number of rows per split file
rows_per_file = 100

# Create 5 files with 200 rows each
for i in range(5):
    start_row = i * rows_per_file
    end_row = (i + 1) * rows_per_file
    split_df = df.iloc[start_row:end_row]
    split_file_path = f'split_file_{i + 1}.csv'
    split_df.to_csv(split_file_path, index=False)
    print(f'File {split_file_path} created with rows from {start_row} to {end_row}')

print('Files created successfully.')


File split_file_1.csv created with rows from 0 to 100
File split_file_2.csv created with rows from 100 to 200
File split_file_3.csv created with rows from 200 to 300
File split_file_4.csv created with rows from 300 to 400
File split_file_5.csv created with rows from 400 to 500
Files created successfully.


# Change directory

In [7]:
import pandas as pd

# Load the CSV file
file_path = r"C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\report7.csv"
data = pd.read_csv(file_path)

# Replace 'report1' with 'Report2' in the 'VersionData' and 'PathOnClient' columns
data['VersionData'] = data['VersionData'].str.replace('report1', 'Report8')
data['PathOnClient'] = data['PathOnClient'].str.replace('report1', 'Report8')

# Save the modified DataFrame to a new CSV file
modified_file_path = r"C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Report8.csv"
data.to_csv(modified_file_path, index=False)

# Provide the path to the modified file
print(f'Modified file saved at: {modified_file_path}')


Modified file saved at: C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Report8.csv


In [1]:
import os
import pandas as pd

# Define the folder containing the CSV files
folder_path = r"C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv"  

# Loop through each file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        # Construct the full file path
        file_path = os.path.join(folder_path, filename)
        
        # Load the CSV file
        data = pd.read_csv(file_path)
        
        # Replace drive 'D' with 'E' in the 'VersionData' and 'PathOnClient' columns
        data['VersionData'] = data['VersionData'].str.replace('D:', 'E:')
        data['PathOnClient'] = data['PathOnClient'].str.replace('D:', 'E:')
        
        # Save the modified DataFrame to a new CSV file
        modified_file_path = os.path.join(folder_path, f'Modified_{filename}')
        data.to_csv(modified_file_path, index=False)

        print(f'Modified file saved at: {modified_file_path}')


Modified file saved at: C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Modified_Report2.csv
Modified file saved at: C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Modified_Report3.csv
Modified file saved at: C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Modified_Report4.csv
Modified file saved at: C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Modified_Report5.csv
Modified file saved at: C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Modified_Report6.csv
Modified file saved at: C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Modified_Report7.csv
Modified file saved at: C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Modified_Report8.csv
