# Attachment generator

In [None]:
import os

def create_exact_size_file(file_path, size_mb):
    # Calculate the exact size in bytes
    size_bytes = size_mb * 1024 * 1024

    # Open the file in write binary ('wb') mode
    with open(file_path, 'wb') as file:
        file.seek(size_bytes - 1)  # Move to the position one byte before the desired size
        file.write(b'\0')  # Write a single zero byte at this position

def generate_files(directory_path, excel_filename, pdf_filename, target_size_mb):
    # Ensure the directory ends with a slash
    if not directory_path.endswith('/'):
        directory_path += '/'

    # Generate exact-sized Excel and PDF files
    create_exact_size_file(directory_path + excel_filename, target_size_mb)
    create_exact_size_file(directory_path + pdf_filename, target_size_mb)

    print(f'Generated Excel file at {directory_path + excel_filename} with size exactly {target_size_mb} MB')
    print(f'Generated PDF file at {directory_path + pdf_filename} with size exactly {target_size_mb} MB')

# Set the directory where files will be saved
directory_path = "/Users/ikawahyuni/dataquality/dummy_files"

# Set filenames and target size
generate_files(directory_path, 'dummy_15mb.xlsx', 'dummy_15mb.pdf', 15)


# 14 MB

In [1]:
import os

def create_exact_size_file(file_path, size_mb):
    # Calculate the exact size in bytes
    size_bytes = size_mb * 1024 * 1024

    # Open the file in write binary ('wb') mode
    with open(file_path, 'wb') as file:
        file.seek(size_bytes - 1)  # Move to the position one byte before the desired size
        file.write(b'\0')  # Write a single zero byte at this position

def generate_files(directory_path, excel_filename, pdf_filename, target_size_mb):
    # Ensure the directory ends with a slash
    if not directory_path.endswith('/'):
        directory_path += '/'

    # Generate exact-sized Excel and PDF files
    create_exact_size_file(directory_path + excel_filename, target_size_mb)
    create_exact_size_file(directory_path + pdf_filename, target_size_mb)

    print(f'Generated Excel file at {directory_path + excel_filename} with size exactly {target_size_mb} MB')
    print(f'Generated PDF file at {directory_path + pdf_filename} with size exactly {target_size_mb} MB')

# Set the directory where files will be saved
directory_path = "/Users/ikawahyuni/dataquality/dummy_files"

# Set filenames and target size (change from 15 to 14)
generate_files(directory_path, 'dummy_14mb.xlsx', 'dummy_14mb.pdf', 14)


Generated Excel file at /Users/ikawahyuni/dataquality/dummy_files/dummy_14mb.xlsx with size exactly 14 MB
Generated PDF file at /Users/ikawahyuni/dataquality/dummy_files/dummy_14mb.pdf with size exactly 14 MB


# maximum size limit of 10,485,760 bytes (10 MB)

In [10]:
import os

def create_exact_size_file(file_path, size_mb):
    # Maximum size in bytes (10 MB)
    max_size_bytes = 9485760

    # Calculate the desired size in bytes
    size_bytes = size_mb * 1024 * 1024

    # Adjust size if it exceeds the maximum allowed
    if size_bytes > max_size_bytes:
        size_bytes = max_size_bytes

    # Open the file in write binary ('wb') mode
    with open(file_path, 'wb') as file:
        file.seek(size_bytes - 1)  # Move to the position one byte before the desired size
        file.write(b'\0')  # Write a single zero byte at this position

def generate_files(directory_path, excel_filename, pdf_filename, target_size_mb):
    # Ensure the directory ends with a slash
    if not directory_path.endswith('/'):
        directory_path += '/'

    # Generate exact-sized Excel and PDF files
    create_exact_size_file(directory_path + excel_filename, target_size_mb)
    create_exact_size_file(directory_path + pdf_filename, target_size_mb)

    actual_size_mb = min(target_size_mb, 10)  # Limit the reported size to 10 MB max
    print(f'Generated Excel file at {directory_path + excel_filename} with size up to {actual_size_mb} MB')
    print(f'Generated PDF file at {directory_path + pdf_filename} with size up to {actual_size_mb} MB')

# Set the directory where files will be saved
directory_path = "D:\dataquality\dummy_files"

# Set filenames and target size (e.g., 12 MB would be reduced to 10 MB)
generate_files(directory_path, 'dummy.xlsx', 'dummy.pdf', 12)


Generated Excel file at D:\dataquality\dummy_files/dummy.xlsx with size up to 10 MB
Generated PDF file at D:\dataquality\dummy_files/dummy.pdf with size up to 10 MB


## Tool to collect all tickets 

In [1]:
import os
import zipfile
import rarfile  # pip install rarfile
import pandas as pd

def extract_and_process_files(directory):

    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        extract_path = os.path.join(directory, filename[:-4]) 

   
        if filename.endswith(".zip"):
            with zipfile.ZipFile(filepath, 'r') as zip_ref:
                zip_ref.extractall(extract_path)
                print(f"Extracted {filename} to {extract_path}")
        elif filename.endswith(".rar"):
            with rarfile.RarFile(filepath, 'r') as rar_ref:
                rar_ref.extractall(extract_path)
                print(f"Extracted {filename} to {extract_path}")
        else:
            continue
        
 
        report_file_path = find_report_file(extract_path)
        if report_file_path:
            df = create_dataframe_from_file(report_file_path, extract_path)
            csv_path = os.path.join(directory, filename[:-4] + '_files_info.csv')
            df.to_csv(csv_path, index=False)
            print(f"Data from {report_file_path} saved to CSV at {csv_path}")
        else:
            print(f"No 'ReportTest.arx' found in {extract_path}")

def find_report_file(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file == 'ReportTest.arx':
                return os.path.join(root, file)
    return None

def create_dataframe_from_file(file_path, extract_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    titles = []
    descriptions = []
    version_data = []
    first_publish_location_ids = []

    for line in lines:
        if line.startswith("DATA"):
            parts = line.strip().split('"')
            ticket_number = parts[1].strip()
            file_name = parts[3].strip().partition(' ')[2]  # Extracting everything after "1 "

            full_path = os.path.join(extract_path, file_name)

            titles.append(file_name)
            descriptions.append("")
            version_data.append(full_path)
            first_publish_location_ids.append(ticket_number)

    return pd.DataFrame({
        'Title': titles,
        'Description': descriptions,
        'VersionData': version_data,
        'FirstPublishLocationId': first_publish_location_ids
    })


directory_path = r'D:\Salesforce\archive\dataquality\Tool for attachments'
extract_and_process_files(directory_path)


Extracted ReportTest.zip to D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest
Data from D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest\ReportTest.arx saved to CSV at D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest_files_info.csv


## Tool to collect all tickets 2

In [18]:
import os
import zipfile
import rarfile  # pip install rarfile
import pandas as pd

def extract_and_process_files(directory):
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)

        # Extract the big zip file
        if filename.endswith(".zip"):
            extract_path = os.path.join(directory, filename[:-4])
            with zipfile.ZipFile(filepath, 'r') as zip_ref:
                zip_ref.extractall(extract_path)
                print(f"Extracted {filename} to {extract_path}")

            # Convert specific file types to zip within the extracted folder
            convert_files_to_zip(extract_path)

            # Process the extracted folder to find 'ReportTest.arx' and create the DataFrame
            report_file_path = find_report_file(extract_path)
            if report_file_path:
                df = create_dataframe_from_file(report_file_path, extract_path)
                csv_path = os.path.join(directory, filename[:-4] + '_files_info.csv')
                df.to_csv(csv_path, index=False)
                print(f"Data from {report_file_path} saved to CSV at {csv_path}")
            else:
                print(f"No 'ReportTest.arx' found in {extract_path}")

def convert_files_to_zip(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith((".txt", ".rtp", ".tif", ".rar",".rptdesign",".js",".jfif",".crdownload",".heic",".htm")):
                file_path = os.path.join(root, file)
                new_zip_path = file_path[:-4] + ".zip"
                with zipfile.ZipFile(new_zip_path, 'w') as zip_ref:
                    zip_ref.write(file_path, file)
                os.remove(file_path)  # Optionally remove the original file
                print(f"Converted {file} to {new_zip_path}")

def find_report_file(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file == 'Report.arx':
                return os.path.join(root, file)
    return None

def create_dataframe_from_file(file_path, extract_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    titles = []
    descriptions = []
    version_data = []
    path_on_client = []
    first_publish_location_ids = []

    for line in lines:
        if line.startswith("DATA"):
            parts = line.strip().split('"')
            ticket_number = parts[1].strip()
            file_name = parts[3].strip().partition(' ')[2]  # Extracting everything after "1 "

            # Check if the file was converted to zip
            original_file_path = os.path.join(extract_path, file_name)
            zip_file_path = original_file_path[:-4] + ".zip"
            if os.path.exists(zip_file_path):
                full_path = zip_file_path
            else:
                full_path = original_file_path

            titles.append(file_name)
            descriptions.append("")
            version_data.append(full_path)
            path_on_client.append(full_path)
            first_publish_location_ids.append(ticket_number)

    return pd.DataFrame({
        'Title': titles,
        'Description': descriptions,
        'VersionData': version_data,
        'PathOnClient': path_on_client,
        'FirstPublishLocationId': first_publish_location_ids
    })

directory_path = r"D:\Python2\Attachment"
extract_and_process_files(directory_path)


Extracted Report(6).zip to D:\Python2\Attachment\Report(6)
Converted 20240727_231535_160.heic to D:\Python2\Attachment\Report(6)\Report(6)\20240727_231535_160..zip
Converted 6e929343-c3a9-4de4-b96f-ba7c8f45cf51_640.jfif to D:\Python2\Attachment\Report(6)\Report(6)\6e929343-c3a9-4de4-b96f-ba7c8f45cf51_640..zip
Converted a20983e5-4925-427f-a525-d2739548a59e+-+M.+Abdan+Alkhasbi_759.jfif to D:\Python2\Attachment\Report(6)\Report(6)\a20983e5-4925-427f-a525-d2739548a59e+-+M.+Abdan+Alkhasbi_759..zip
Converted ADI REMANTO_100724_897.htm to D:\Python2\Attachment\Report(6)\Report(6)\ADI REMANTO_100724_897.zip
Converted AGUSTINUS TANNOS_150724_665.htm to D:\Python2\Attachment\Report(6)\Report(6)\AGUSTINUS TANNOS_150724_665.zip
Converted ANDANG TJAHJANTOK_150724_885.htm to D:\Python2\Attachment\Report(6)\Report(6)\ANDANG TJAHJANTOK_150724_885.zip
Converted ANDI HARTONO_wa150724_491.htm to D:\Python2\Attachment\Report(6)\Report(6)\ANDI HARTONO_wa150724_491.zip
Converted cecilia perubahan no rek_975

In [3]:
import os
import zipfile
import rarfile  # pip install rarfile
import pandas as pd

def extract_and_process_files(directory):
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        extract_path = os.path.join(directory, filename[:-4]) 

        if filename.endswith(".zip"):
            with zipfile.ZipFile(filepath, 'r') as zip_ref:
                zip_ref.extractall(extract_path)
                print(f"Extracted {filename} to {extract_path}")
        elif filename.endswith(".rar"):
            with rarfile.RarFile(filepath, 'r') as rar_ref:
                rar_ref.extractall(extract_path)
                print(f"Extracted {filename} to {extract_path}")
        else:
            continue

        report_file_path = find_report_file(extract_path)
        if report_file_path:
            df = create_dataframe_from_file(report_file_path, extract_path)
            csv_path = os.path.join(directory, filename[:-4] + '_files_info.csv')
            df.to_csv(csv_path, index=False)
            print(f"Data from {report_file_path} saved to CSV at {csv_path}")
        else:
            print(f"No 'ReportTest.arx' found in {extract_path}")

def find_report_file(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file == 'ReportTest.arx':
                return os.path.join(root, file)
    return None

def create_dataframe_from_file(file_path, extract_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    titles = []
    descriptions = []
    version_data = []
    first_publish_location_ids = []

    for line in lines:
        if line.startswith("DATA"):
            parts = line.strip().split('"')
            ticket_number = parts[1].strip()
            file_name = parts[3].strip().partition(' ')[2]  # Extracting everything after "1 "

            full_path = os.path.join(extract_path, file_name)

            titles.append(file_name)
            descriptions.append("")
            version_data.append(full_path)
            first_publish_location_ids.append(ticket_number)

    return pd.DataFrame({
        'Title': titles,
        'Description': descriptions,
        'VersionData': version_data,
        'PathOnClient': version_data,  # New column added here
        'FirstPublishLocationId': first_publish_location_ids
    })

directory_path = r'D:\Salesforce\archive\dataquality\Tool for attachments'
extract_and_process_files(directory_path)


Extracted ReportTest.zip to D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest
Data from D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest\ReportTest.arx saved to CSV at D:\Salesforce\archive\dataquality\Tool for attachments\ReportTest_files_info.csv


In [6]:
import pandas as pd
import os

def create_dataframe_from_file(file_path, extract_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    titles = []
    descriptions = []
    version_data = []
    first_publish_location_ids = []

    for line in lines:
        if line.startswith("DATA"):
            parts = line.strip().split('"')
            ticket_number = parts[1].strip()
            file_name = parts[3].strip().partition(' ')[2]  # Extracting everything after "1 "

            full_path = os.path.join(r'E:\Attachment\report1', file_name)

            titles.append(file_name)
            descriptions.append("")
            version_data.append(full_path)
            first_publish_location_ids.append(ticket_number)

    return pd.DataFrame({
        'Title': titles,
        'Description': descriptions,
        'VersionData': version_data,
        'PathOnClient': version_data,  # New column added here
        'FirstPublishLocationId': first_publish_location_ids
    })

# Assume this is the path to the report file you uploaded
report_file_path = r"C:\Users\maste\Downloads\dataloader_v60.0.2\server\attachment\Report.arx"
# Create dataframe
df = create_dataframe_from_file(report_file_path, 'E:\\Attachment\\report1')
# Save to CSV
csv_path = r"C:\Users\maste\Downloads\dataloader_v60.0.2\server\attachment\Report.csv"
df.to_csv(csv_path, index=False)
print(f"Data from {report_file_path} saved to CSV at {csv_path}")


Data from C:\Users\maste\Downloads\dataloader_v60.0.2\server\attachment\Report.arx saved to CSV at C:\Users\maste\Downloads\dataloader_v60.0.2\server\attachment\Report.csv


In [11]:
## to create copied 1000 ine


import pandas as pd

path=r"C:\Users\maste\Downloads\bricare\file_for_attachment.csv"
df=pd.read_csv(path)

df = pd.concat([df] * 1000, ignore_index=True)
df.to_csv(path, index=False)

## to change the column FirstPublishLocationId with all values in ID column taken from case ID in salesforce

In [14]:
import pandas as pd

# Load the data
df1 = pd.read_csv(r"C:\Users\maste\Downloads\bricare\file_for_attachment.csv")  # Assuming this file contains the columns Title, Description, VersionData, PathOnClient, FirstPublishLocationId
df2 = pd.read_csv(r"C:\Users\maste\Downloads\bricare\extract_case_id_1000.csv")  # Assuming this file contains the column ID

# Check if both DataFrames have the same number of rows
if len(df1) != len(df2):
    raise ValueError("The number of rows in both files must be the same")

# Replace the 'FirstPublishLocationId' column in df1 with the 'ID' column from df2
df1['FirstPublishLocationId'] = df2['ID']

# Save the modified DataFrame to a new CSV file
# df1.to_csv('modified_file.csv', index=False)

print("The 'FirstPublishLocationId' column has been replaced with the 'ID' column values.")

df1.iloc[900:1000]


The 'FirstPublishLocationId' column has been replaced with the 'ID' column values.


Unnamed: 0,Title,Description,VersionData,PathOnClient,FirstPublishLocationId
900,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449KLYAY
901,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449KMYAY
902,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449KNYAY
903,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449KOYAY
904,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449KPYAY
...,...,...,...,...,...
995,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449LtYAI
996,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449LuYAI
997,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449LvYAI
998,attachment,,C:\Users\lenovo\Downloads\dummy15mb.pdf,/Users/ikawahyuni/dataquality/large_dummy_file...,500MR00000449LwYAI


In [16]:
## Slice into 5 files

import pandas as pd

# Load the file
file_path = r"C:\Users\maste\Downloads\bricare\file_for_attachment.csv" 
df = pd.read_csv(file_path)

# Number of rows per split file
rows_per_file = 100

# Create 5 files with 200 rows each
for i in range(5):
    start_row = i * rows_per_file
    end_row = (i + 1) * rows_per_file
    split_df = df.iloc[start_row:end_row]
    split_file_path = f'split_file_{i + 1}.csv'
    split_df.to_csv(split_file_path, index=False)
    print(f'File {split_file_path} created with rows from {start_row} to {end_row}')

print('Files created successfully.')


File split_file_1.csv created with rows from 0 to 100
File split_file_2.csv created with rows from 100 to 200
File split_file_3.csv created with rows from 200 to 300
File split_file_4.csv created with rows from 300 to 400
File split_file_5.csv created with rows from 400 to 500
Files created successfully.


# Change directory

In [7]:
import pandas as pd

# Load the CSV file
file_path = r"C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\report7.csv"
data = pd.read_csv(file_path)

# Replace 'report1' with 'Report2' in the 'VersionData' and 'PathOnClient' columns
data['VersionData'] = data['VersionData'].str.replace('report1', 'Report8')
data['PathOnClient'] = data['PathOnClient'].str.replace('report1', 'Report8')

# Save the modified DataFrame to a new CSV file
modified_file_path = r"C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Report8.csv"
data.to_csv(modified_file_path, index=False)

# Provide the path to the modified file
print(f'Modified file saved at: {modified_file_path}')


Modified file saved at: C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Report8.csv


In [1]:
import os
import pandas as pd

# Define the folder containing the CSV files
folder_path = r"C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv"  

# Loop through each file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        # Construct the full file path
        file_path = os.path.join(folder_path, filename)
        
        # Load the CSV file
        data = pd.read_csv(file_path)
        
        # Replace drive 'D' with 'E' in the 'VersionData' and 'PathOnClient' columns
        data['VersionData'] = data['VersionData'].str.replace('D:', 'E:')
        data['PathOnClient'] = data['PathOnClient'].str.replace('D:', 'E:')
        
        # Save the modified DataFrame to a new CSV file
        modified_file_path = os.path.join(folder_path, f'Modified_{filename}')
        data.to_csv(modified_file_path, index=False)

        print(f'Modified file saved at: {modified_file_path}')


Modified file saved at: C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Modified_Report2.csv
Modified file saved at: C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Modified_Report3.csv
Modified file saved at: C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Modified_Report4.csv
Modified file saved at: C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Modified_Report5.csv
Modified file saved at: C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Modified_Report6.csv
Modified file saved at: C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Modified_Report7.csv
Modified file saved at: C:\Users\maste\Downloads\dataloader_v60.0.2\server\Report csv\Report csv\Modified_Report8.csv


# Change the path

In [22]:
import pandas as pd
import os

# Load the CSV file
file_path = r"D:\Python2\Attachment\attachments_log\log_report8\error080524022045731.csv"
df = pd.read_csv(file_path)

# Define the new folder path
new_folder_path = "D:\Python2\Attachment\Report(7)\Report(7)"

# Update the VERSIONDATA and PATHONCLIENT columns
# df['VERSIONDATA'] = df['VERSIONDATA'].apply(lambda x: new_folder_path + x.split('Report(4)')[-1].replace('/', '\\'))
# df['PATHONCLIENT'] = df['PATHONCLIENT'].apply(lambda x: new_folder_path + x.split('Report(4)')[-1].replace('/', '\\'))

df['VersionData'] = df['VersionData'].apply(lambda x: os.path.join(new_folder_path, os.path.basename(x)).replace('/', '\\'))
df['PathOnClient'] = df['PathOnClient'].apply(lambda x: os.path.join(new_folder_path, os.path.basename(x)).replace('/', '\\'))


# Save the updated DataFrame to a new CSV file
output_path = r"D:\Python2\Attachment\attachments_log\log_report8\error080524022045731_done.csv"
df.to_csv(output_path, index=False)

# Provide the path to the updated file
output_path


'D:\\Python2\\Attachment\\attachments_log\\log_report8\\error080524022045731_done.csv'

# Converting into ZIP

In [23]:
import pandas as pd
import os
import zipfile

# Load the CSV file
file_path = r"D:\Python2\Attachment\attachments_log\log_report8\error081224021733461.csv"
df = pd.read_csv(file_path)

# Iterate through each row in the DataFrame
for index, row in df.iterrows():
    # Get the file title and the directory from VERSIONDATA
    title = row['TITLE']
    version_data_path = row['VERSIONDATA']
    
    # Ensure both TITLE and VERSIONDATA are not null
    if pd.notnull(title) and pd.notnull(version_data_path):
        # Get the directory path from VERSIONDATA
        dir_path = os.path.dirname(version_data_path)
        # Define the full file path
        full_file_path = os.path.join(dir_path, title)
        
        # Define the ZIP file path
        zip_file_path = full_file_path.replace(os.path.splitext(title)[1], '.zip')
        
        try:
            # Create the ZIP file in the same directory without deleting the original file
            with zipfile.ZipFile(zip_file_path, 'w') as zipf:
                zipf.write(full_file_path, title)
            print(f"Successfully zipped: {full_file_path}")
            
            # Update the VERSIONDATA and PATHONCLIENT columns with the new ZIP file path
            df.at[index, 'VERSIONDATA'] = zip_file_path
            df.at[index, 'PATHONCLIENT'] = zip_file_path
            
        except FileNotFoundError:
            print(f"File not found: {full_file_path}")

# Define the new file name by adding "_done" to the original file name
done_file_path = file_path.replace('.csv', '_done2.csv')

# Save the updated DataFrame to the new CSV file
df.to_csv(done_file_path, index=False)

# Output the path to the new CSV file
print(f"Updated CSV file saved as: {done_file_path}")



File not found: D:\Python2\Attachment\Report(7)\Report(7)\SITI BAINAH_080724_34.htm
Successfully zipped: D:\Python2\Attachment\Report(7)\Report(7)\ABSENSI MANUAL NINIK_127.odt
Successfully zipped: D:\Python2\Attachment\Report(7)\Report(7)\Invoice Blibli I Wayan_138.csv
File not found: D:\Python2\Attachment\Report(7)\Report(7)\54204961a.jpeg_168.crdownload
File not found: D:\Python2\Attachment\Report(7)\Report(7)\NENO RIANA_080724_378.htm
File not found: D:\Python2\Attachment\Report(7)\Report(7)\LEDYANA CHRISTINE SUMITRO_170724_380.htm
File not found: D:\Python2\Attachment\Report(7)\Report(7)\BBKHIT SULAWESI TENGGARA_0724_381.htm
File not found: D:\Python2\Attachment\Report(7)\Report(7)\SUDIRMAN_080724_397.htm
File not found: D:\Python2\Attachment\Report(7)\Report(7)\JUSRI_080724_398.htm
File not found: D:\Python2\Attachment\Report(7)\Report(7)\SUSANTO_170724_399.htm
File not found: D:\Python2\Attachment\Report(7)\Report(7)\RUSWANTORO WIDJAJA_150724_400.htm
File not found: D:\Python2\At

# Converting into ZIP for files without extentions

In [26]:
import pandas as pd
import os
import zipfile
import shutil

# Load the CSV file
file_path = r"D:\Python2\Attachment\attachments_log\log_report7\error081224023217824.csv"
df = pd.read_csv(file_path)

# Iterate through each row in the DataFrame
for index, row in df.iterrows():
    title = row.get('TITLE', None)
    version_data_path = row.get('VERSIONDATA', None)
    
    if pd.notnull(title) and pd.notnull(version_data_path):
        dir_path = os.path.dirname(version_data_path)
        full_file_path = os.path.join(dir_path, title)
        
        # Check if the file exists
        if os.path.exists(full_file_path):
            # Handle files without an extension
            if not os.path.splitext(title)[1]:
                temp_file_path = full_file_path + '.tmp'
                shutil.copy(full_file_path, temp_file_path)
                
                # Zip the file with the temporary extension
                zip_file_path = temp_file_path.replace('.tmp', '.zip')
                
                try:
                    with zipfile.ZipFile(zip_file_path, 'w') as zipf:
                        zipf.write(temp_file_path, os.path.basename(temp_file_path))
                    print(f"Successfully zipped: {temp_file_path}")
                    
                    # Update the DataFrame with the new ZIP file path
                    df.at[index, 'VERSIONDATA'] = zip_file_path
                    df.at[index, 'PATHONCLIENT'] = zip_file_path
                    
                    # Clean up the temporary file
                    os.remove(temp_file_path)
                    
                except Exception as e:
                    print(f"Error zipping file {temp_file_path}: {str(e)}")
            else:
                # If the file has an extension, handle it normally
                zip_file_path = full_file_path.replace(os.path.splitext(title)[1], '.zip')
                
                try:
                    with zipfile.ZipFile(zip_file_path, 'w') as zipf:
                        zipf.write(full_file_path, title)
                    print(f"Successfully zipped: {full_file_path}")
                    
                    # Update the DataFrame with the new ZIP file path
                    df.at[index, 'VERSIONDATA'] = zip_file_path
                    df.at[index, 'PATHONCLIENT'] = zip_file_path
                    
                except Exception as e:
                    print(f"Error zipping file {full_file_path}: {str(e)}")
        else:
            print(f"File not found: {full_file_path}")

# Define the new file name by adding "_done" to the original file name
done_file_path = file_path.replace('.csv', '_done4.csv')

# Save the updated DataFrame to the new CSV file
df.to_csv(done_file_path, index=False)

# Output the path to the new CSV file
print(f"Updated CSV file saved as: {done_file_path}")


File not found: D:\Python2\Attachment\Report(6)\Report(6)\20240727_231535_160.heic
File not found: D:\Python2\Attachment\Report(6)\Report(6)\NEW_REKENING_KORAN_ONLINE_323401019590537_2024-07-28_2024-07-29_00343994.pdf_429.crdownload
File not found: D:\Python2\Attachment\Report(6)\Report(6)\SURYA PURNAMA TJENG_100724_485.htm
File not found: D:\Python2\Attachment\Report(6)\Report(6)\ANDI HARTONO_wa150724_491.htm
File not found: D:\Python2\Attachment\Report(6)\Report(6)\TEK SUYANTO_200724_508.htm
File not found: D:\Python2\Attachment\Report(6)\Report(6)\RUDY YUWONO_visa150724_513.htm
File not found: D:\Python2\Attachment\Report(6)\Report(6)\RUDY YUWONO_plt150724_514.htm
File not found: D:\Python2\Attachment\Report(6)\Report(6)\DESSY ROSMARIA_100724_517.htm
File not found: D:\Python2\Attachment\Report(6)\Report(6)\THERESIA_150724_532.htm
File not found: D:\Python2\Attachment\Report(6)\Report(6)\RMOCHAMAD GANDHANI_150724_534.htm
File not found: D:\Python2\Attachment\Report(6)\Report(6)\GONA

In [27]:
import pandas as pd

path=r"D:\Python2\Attachment\attachments_log\log_report3\error081224104300248.csv"

df=pd.read_csv(path)
# df.to_csv(path, index=False)

df

Unnamed: 0,TITLE,DESCRIPTION,VERSIONDATA,PATHONCLIENT,FIRSTPUBLISHLOCATIONID,ERROR,ERROR.1,ERROR2
0,image-1206599770477098_296,,D:\Python2\Attachment\Report(2)\Report(2)\imag...,D:\Python2\Attachment\Report(2)\Report(2)\imag...,TTB000053927901,Error converting value to correct data type: C...,Error converting value to correct data type: C...,Error converting value to correct data type: C...


# To attach files to cases aftyer closing

In [1]:
import pandas as pd

# Load the two CSV files
file_1_path = r"C:\Users\maste\Downloads\dataloader_v60.0.2\extractfile_alex7_from_file.csv"
file_2_path = r"C:\Users\maste\Downloads\dataloader_v60.0.2\extract_content_doc_link.csv"

# Read the files into pandas dataframes
df_extract = pd.read_csv(file_1_path)
df_content_doc_link = pd.read_csv(file_2_path)

# Merging the two dataframes based on the 'CONTENTDOCUMENTID' column
df_merged = pd.merge(df_extract, df_content_doc_link[['CONTENTDOCUMENTID', 'ID', 'LINKEDENTITYID']], on='CONTENTDOCUMENTID', how='left')

# Save the merged dataframe to a new CSV file
output_file_path = r'C:\Users\maste\Downloads\dataloader_v60.0.2\extractfile_alex7_done.csv'
df_merged.to_csv(output_file_path, index=False)

print("File saved successfully at:", output_file_path)


File saved successfully at: C:\Users\maste\Downloads\dataloader_v60.0.2\extractfile_alex7_done.csv


# Combine all Report Files

In [1]:
import os
import pandas as pd

# Set the directory where your files are located
folder_path = r"D:\Python2\cekcek\Report csv"

# Initialize an empty list to hold dataframes
dataframes = []

# Loop through all CSV files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        # Read the CSV file
        df = pd.read_csv(os.path.join(folder_path, filename))
        
        # Add the 'source_file' column
        df['source_file'] = filename
        
        # Append the dataframe to the list
        dataframes.append(df)

# Concatenate all dataframes in the list into one dataframe
combined_df = pd.concat(dataframes, ignore_index=True)

# Save the combined dataframe to a new CSV file
output_file = 'D:\Python2\cekcek\Report csv\combined_report.csv'
combined_df.to_csv(output_file, index=False)

print(f"Combined file saved as {output_file}")


Combined file saved as D:\Python2\cekcek\Report csv\combined_report.csv
