<hr>

***Version: 1001.1792024.qut.cs.tnl***

***Sk Tanzir Mehedi, PhD Student, QUT***

***Supervisory Team: Prof. Raja Jurdak & Dr Chadni Islam***
<hr>

**----Start of Step 8 Analysis----**

In [None]:
import os
import requests
import pandas as pd

# Paths
excel_file = "D:/Final Version/Step 7 FinalSelectedBenignPackages/finalSelectedBenignPackages.xlsx"
output_excel_file = "packageDownloadSummary.xlsx"

df = pd.read_excel(excel_file)

package_names = df['Benign Package Name'].tolist()
package_versions = df['Benign Package Version'].tolist()

download_folder = 'downloaded_packages'
if not os.path.exists(download_folder):
    os.makedirs(download_folder)

base_url = "https://pypi.org/pypi/{}/{}/json"

success_packages = []
skipped_packages = []
failed_packages = []

# Function to download and save the package file (.zip or .tar.gz)
def download_package(package_name, package_version):
    response = requests.get(base_url.format(package_name, package_version))
    
    if response.status_code == 200:
        data = response.json()
        try:
            # Try to find the URL for the .zip file first
            file_url = next(
                (file['url'] for file in data['urls'] if file['packagetype'] == 'sdist' and file['filename'].endswith('.zip')),
                None
            )
            
            # If .zip is not found, fall back to .tar.gz
            if not file_url:
                file_url = next(
                    (file['url'] for file in data['urls'] if file['packagetype'] == 'sdist' and file['filename'].endswith('.tar.gz')),
                    None
                )
            
            if file_url:
                filename = os.path.join(download_folder, file_url.split('/')[-1])
                
                print(f"Downloading {package_name} version {package_version} from {file_url}...")
                file_response = requests.get(file_url)
                
                with open(filename, 'wb') as f:
                    f.write(file_response.content)
                print(f"Saved {package_name} version {package_version} as {filename}.")
                
                success_packages.append({'Benign Package Name': package_name, 'Benign Package Version': package_version})
            else:
                print(f"No source file (.zip or .tar.gz) found for {package_name} version {package_version}")
                skipped_packages.append({'Benign Package Name': package_name, 'Benign Package Version': package_version})
        except KeyError:
            print(f"No download URL found for {package_name} version {package_version}")
            skipped_packages.append({'Benign Package Name': package_name, 'Benign Package Version': package_version})
    else:
        print(f"Failed to retrieve data for {package_name} version {package_version} from PyPI.")
        failed_packages.append({'Benign Package Name': package_name, 'Benign Package Version': package_version})

for package, version in zip(package_names, package_versions):
    download_package(package, version)

success_df = pd.DataFrame(success_packages)
skipped_df = pd.DataFrame(skipped_packages)
failed_df = pd.DataFrame(failed_packages)

with pd.ExcelWriter(output_excel_file) as writer:
    success_df.to_excel(writer, sheet_name='Success', index=False)
    skipped_df.to_excel(writer, sheet_name='Skipped', index=False)
    failed_df.to_excel(writer, sheet_name='Failed', index=False)

print(f"Successfully downloaded: {len(success_packages)}")
print(f"Skipped due to missing files: {len(skipped_packages)}")
print(f"Failed to retrieve data: {len(failed_packages)}")

print("All downloads completed and summary saved to Excel.")

Downloading X11Client version 1.4 from https://files.pythonhosted.org/packages/0d/af/d68bb145dc5aabd56e026c10bc105829aca6ceac75a9ff6bce1bb241cdb7/X11Client-1.4.tar.gz...
Saved X11Client version 1.4 as downloaded_packages\X11Client-1.4.tar.gz.
Downloading cent version 5.0.0b1 from https://files.pythonhosted.org/packages/16/85/1633dfaa09894143c8f37001238155e1408549636ec19f3d88a35815ae18/cent-5.0.0b1.tar.gz...
Saved cent version 5.0.0b1 as downloaded_packages\cent-5.0.0b1.tar.gz.
Downloading 1337x version 1.2.6 from https://files.pythonhosted.org/packages/a8/6b/9237d0bd8bdd98c3e1206e5a149fd916b0d7a8216bf9c11f43ee402ee37e/1337x-1.2.6.tar.gz...
Saved 1337x version 1.2.6 as downloaded_packages\1337x-1.2.6.tar.gz.
Downloading L1test version 3.1.1 from https://files.pythonhosted.org/packages/a8/15/57016a69d98716f1d95ffd8e0b6fe88650842b47739744cbc3856eb770e0/L1test-3.1.1.tar.gz...
Saved L1test version 3.1.1 as downloaded_packages\L1test-3.1.1.tar.gz.
Downloading sinch version 1.0.0 from https:/

**----End of Step 8 Analysis----**