In [3]:
from pathlib import Path
from tqdm.notebook import tqdm
import requests
from datetime import datetime
import gzip
import json
import time
import random
import pandas as pd
import numpy as np

datetime_format = "%Y-%m-%dT%H:%M:%S"
cutoff_date = datetime(2023, 4, 1)

In [4]:
# with open("pypi-packages2023-04-24.json", "r") as file:
#     # Load the data from the file into a dictionary
#     ppackages = json.load(file)
# df = (
#     pd.DataFrame(ppackages, columns=["pkg"])
#     .assign(new=np.nan)
# )
# df.to_csv("log.csv", index=False)
# df.head()

In [None]:
df = pd.read_csv("log.csv")
results = []
for ix, row in tqdm(df.iterrows()):
    pkg = row["pkg"]
    new = row["new"]
    if new == 1:
        results.append(pkg)
        print(f"{pkg} added to list")
    elif new == 0:
        continue
        
    # Check if package metadata has been cached before
    cache_filepath = Path(f"../data/cache/{pkg}.json.gz")
    if cache_filepath.is_file():
        with gzip.open(cache_filepath, "r") as f:
            metadata = json.loads(f.read())
    else:  # Query
        response = requests.get(f"https://pypi.org/pypi/{pkg}/json")
        return_code = response.status_code
        if return_code == 200:
            compressed_payload = gzip.compress(response.text.encode("utf-8"))
            with open(cache_filepath, "wb") as f:
                f.write(compressed_payload)
            metadata = response.json()
            time.sleep(random.random())
        else:
            print(f"Return code != 200 - {return_code, pkg}")
            
        
    releases_data = metadata["releases"]
    # Get first release date
    try:
        release_info = next(iter(releases_data.values()))[0]
    except IndexError:  # No info on release
        df.at[ix, "new"] = 0
        continue
    
    # Check date of earliest release
    release_datetime_str = release_info["upload_time"]
    release_datetime = datetime.strptime(release_datetime_str, datetime_format)
        
    if release_datetime > cutoff_date:
        results.append(pkg)
        df.at[ix, "new"] = 1
        print(f"{pkg} added to list")
    else:
        df.at[ix, "new"] = 0
        
    n_candidates = len(results)
    if (n_candidates % 100) == 0:
        print("=============================================")
        print(f"Current number of candidates: {n_candidates}")
        print("=============================================")
df.to_csv("log.csv", index=False)

0it [00:00, ?it/s]

0411-test added to list
0411-test added to list
0staff added to list
0staff added to list
2d added to list
2d added to list
3d added to list
3d added to list
3d-bin-container-packing added to list
Return code != 200 - (404, '3d-bin-container-packing')
3d-bin-container-packing added to list
41datastructure added to list
41datastructure added to list
5345345345345345 added to list
5345345345345345 added to list
a2d added to list
a2d added to list
a3dj added to list
a3dj added to list
aa-charlink added to list
aa-charlink added to list
aadetools added to list
aadetools added to list
aadhar-ocr added to list
aadhar-ocr added to list
aafitrans added to list
aafitrans added to list
aapp2face added to list
aapp2face added to list
aaz-dev added to list
aaz-dev added to list
abbccd added to list
abbccd added to list
abcd-seth added to list
abcd-seth added to list
abch-tree-sitter added to list
abch-tree-sitter added to list
abch-tree-sitter-solidity added to list
abch-tree-sitter-solidity added

aitester added to list
ai-traffic-light-simulator added to list
ai-traffic-light-simulator added to list
ai-tts added to list
ai-tts added to list
aiva-core added to list
aiva-core added to list
aixplain added to list
aixplain added to list
ajallaan added to list
ajallaan added to list
ak-cache added to list
ak-cache added to list
akcalculator added to list
Return code != 200 - (404, 'akcalculator')
akcalculator added to list
akhaleel338package added to list
akhaleel338package added to list
ak-keydetector added to list
ak-keydetector added to list
al2var added to list
al2var added to list
alab added to list
alab added to list
alamos added to list
alamos added to list
Current number of candidates: 300
alapchari added to list
alapchari added to list
albert-toolkit added to list
albert-toolkit added to list
albprov added to list
albprov added to list
albprov-test added to list
albprov-test added to list
alchemiscale added to list
alchemiscale added to list
alethiometer added to list
aleth

Return code != 200 - (404, 'aprsd-nearest-plugin')
aprsd-nearest-plugin added to list
apvn added to list
apvn added to list
apyproxy added to list
apyproxy added to list
aqainterpreter added to list
aqainterpreter added to list
arabic2latin added to list
arabic2latin added to list
arb-launcher added to list
arb-launcher added to list
arb-watchdog added to list
arb-watchdog added to list
arcade-accelerate added to list
arcade-accelerate added to list
archytas added to list
archytas added to list
arcipelago added to list
arcipelago added to list
arfindata added to list
arfindata added to list
arfm added to list
Return code != 200 - (404, 'arfm')
arfm added to list
arfpy added to list
arfpy added to list
arglu added to list
arglu added to list
argshell added to list
argshell added to list
arinc424 added to list
arinc424 added to list
arincalc added to list
Return code != 200 - (404, 'arincalc')
arincalc added to list
arisu added to list
arisu added to list
arithmetic-vr added to list
arit

babichjacob-coordinator added to list
babichjacob-coordinator added to list
babs added to list
babs added to list
backend-sqlalchemy added to list
backend-sqlalchemy added to list
backseat-driver added to list
backseat-driver added to list
backtrader-binance added to list
backtrader-binance added to list
backtrader-contrib-lucidinvestor added to list
backtrader-contrib-lucidinvestor added to list
backup-collector added to list
backup-collector added to list
bacteria added to list
bacteria added to list
badgie added to list
badgie added to list
baffled-v2 added to list
baffled-v2 added to list
bafunc added to list
Return code != 200 - (404, 'bafunc')
bafunc added to list
bagua-cuda117 added to list
bagua-cuda117 added to list
bahacalculator added to list
bahacalculator added to list
baidufanyiformind added to list
baidufanyiformind added to list
baiduspidercreatedbyhanxu added to list
baiduspidercreatedbyhanxu added to list
balance-nubank added to list
balance-nubank added to list
bamsa

bnummet added to list
bnv added to list
bnv added to list
boa-fm added to list
boa-fm added to list
boa-framework added to list
boa-framework added to list
boai added to list
boai added to list
bobothy-colors added to list
bobothy-colors added to list
bobspdf added to list
bobspdf added to list
bogrod added to list
bogrod added to list
boilercv added to list
boilercv added to list
boilerdata added to list
boilerdata added to list
bolift added to list
bolift added to list
bolt-sdk-py2 added to list
bolt-sdk-py2 added to list
boneflet added to list
boneflet added to list
book-name-generator added to list
book-name-generator added to list
boolean-expression added to list
boolean-expression added to list
bootstrapped-ng added to list
bootstrapped-ng added to list
botcity-ms365-excel-plugin added to list
botcity-ms365-excel-plugin added to list
botheads added to list
botheads added to list
bothpy-ctp added to list
bothpy-ctp added to list
botik added to list
botik added to list
botik-telebo

cdk-cloudformation-poc-azure-blobstorage added to list
cdk-validator-checkov added to list
cdk-validator-checkov added to list
cdmpy added to list
cdmpy added to list
ceaser added to list
ceaser added to list
ceedeetest added to list
ceedeetest added to list
celery-sqlalchemy-kit added to list
celery-sqlalchemy-kit added to list
cellseg-sribd added to list
cellseg-sribd added to list
cellsnake added to list
cellsnake added to list
cellxgene-census added to list
cellxgene-census added to list
cengal added to list
cengal added to list
censoredsummarystats added to list
censoredsummarystats added to list
certbot-dns-e2econfig added to list
certbot-dns-e2econfig added to list
certbot-onion added to list
certbot-onion added to list
certbot-plugin-ionos added to list
certbot-plugin-ionos added to list
certora-cli-alpha-cozy added to list
certora-cli-alpha-cozy added to list
certora-cli-alpha-oz-package-eqchecker added to list
certora-cli-alpha-oz-package-eqchecker added to list
certora-cli-a

Return code != 200 - (404, 'climan')
climan added to list
climix added to list
climix added to list
clip-bbox added to list
clip-bbox added to list
clipped added to list
clipped added to list
cli-sandbox added to list
cli-sandbox added to list
clivo-fnn added to list
clivo-fnn added to list
clivo-gbt added to list
clivo-gbt added to list
cllm-data-curation added to list
cllm-data-curation added to list
clode added to list
clode added to list
cloud-handler added to list
cloud-handler added to list
cloudmonitor added to list
cloudmonitor added to list
cloudproof-fpe added to list
cloudproof-fpe added to list
cloudquicklabs1 added to list
cloudquicklabs1 added to list
clrxdisasm-wrapper added to list
clrxdisasm-wrapper added to list
clsprop added to list
clsprop added to list
cl-testlog added to list
cl-testlog added to list
cluster-colors added to list
cluster-colors added to list
clusterfiles added to list
clusterfiles added to list
clusterfun added to list
clusterfun added to list
cmap

Return code != 200 - (404, 'connect-database')
connect-database added to list
consistency-models added to list
consistency-models added to list
consistent added to list
consistent added to list
constgen added to list
constgen added to list
contact-energy-nz added to list
contact-energy-nz added to list
continue-sestinj added to list
continue-sestinj added to list
convalidatorx added to list
convalidatorx added to list
converito added to list
converito added to list
cook-build added to list
cook-build added to list
cookiecutter-autodocs added to list
cookiecutter-autodocs added to list
cookiecutter-openedx-plugin added to list
cookiecutter-openedx-plugin added to list
cooldfa added to list
cooldfa added to list
coolhtml added to list
coolhtml added to list
cool-seq-tool added to list
cool-seq-tool added to list
cooodecooo added to list
cooodecooo added to list
coookieai added to list
Return code != 200 - (404, 'coookieai')
coookieai added to list
coordinate-descent-attention added to li

Return code != 200 - (404, 'data-automate-python-souravdlboy')
data-automate-python-souravdlboy added to list
database-seeder added to list
database-seeder added to list
database-testing-tools added to list
database-testing-tools added to list
databasez added to list
databasez added to list
databricks-dolly added to list
databricks-dolly added to list
data-cdvst added to list
data-cdvst added to list
dataclasses-sqlitedict added to list
dataclasses-sqlitedict added to list
data-connection added to list
data-connection added to list
datacula added to list
datacula added to list
datadiligence added to list
datadiligence added to list
data-diode added to list
data-diode added to list
dataframeformatter added to list
dataframeformatter added to list
dataframeformattersc added to list
dataframeformattersc added to list
dataframeformatting added to list
dataframeformatting added to list
dataframestyler added to list
dataframestyler added to list
datafstyler added to list
datafstyler added to

dictgen added to list
didicli added to list
didicli added to list
didigo added to list
didigo added to list
diengine-connect added to list
diengine-connect added to list
digicalculator added to list
digicalculator added to list
digidownload added to list
digidownload added to list
digimat-bac0 added to list
digimat-bac0 added to list
digitalai-release-sdk added to list
digitalai-release-sdk added to list
digitalarztools added to list
digitalarztools added to list
digital-unit added to list
digital-unit added to list
digitize added to list
digitize added to list
digocli added to list
digocli added to list
dimensionality-reductions-jmsv added to list
dimensionality-reductions-jmsv added to list
dingraia added to list
dingraia added to list
dino2 added to list
dino2 added to list
dinov2 added to list
dinov2 added to list
directreport added to list
directreport added to list
dirhelp added to list
dirhelp added to list
dirmarks added to list
dirmarks added to list
disallow-import-star added

docs-chat-bot added to list
docx-word-instance-generator added to list
docx-word-instance-generator added to list
dodoria721 added to list
dodoria721 added to list
doe-dap-dl added to list
doe-dap-dl added to list
doit4u added to list
doit4u added to list
doji-core added to list
doji-core added to list
dojo-truant added to list
dojo-truant added to list
dokusan added to list
dokusan added to list
dolby added to list
dolby added to list
dollar-templates added to list
dollar-templates added to list
domino-py added to list
domino-py added to list
donutlib added to list
donutlib added to list
Current number of candidates: 2900
dop-integration-libs added to list
dop-integration-libs added to list
doro0001 added to list
doro0001 added to list
dorsa-datetime added to list
dorsa-datetime added to list
dorsa-logging added to list
dorsa-logging added to list
doru added to list
doru added to list
dot-to-object added to list
dot-to-object added to list
doublebellycluster added to list
doublebellyc

embedin added to list
emb-opt added to list
emb-opt added to list
emily-editor added to list
emily-editor added to list
emirp added to list
emirp added to list
emmett-prometheus added to list
emmett-prometheus added to list
emojichat added to list
emojichat added to list
Current number of candidates: 3200
emojidb-python added to list
emojidb-python added to list
enabledisablelua added to list
enabledisablelua added to list
encpp added to list
encpp added to list
endurance-etl added to list
endurance-etl added to list
enexis-friends-quiz added to list
enexis-friends-quiz added to list
eniris added to list
eniris added to list
enprog added to list
enprog added to list
ensemblrestpy added to list
ensemblrestpy added to list
ensf added to list
ensf added to list
ensf-338-final-project added to list
ensf-338-final-project added to list
ensf-338-final-project-package added to list
ensf-338-final-project-package added to list
ensf338grp26prj added to list
ensf338grp26prj added to list
entelec

fast-depends added to list
fastencode added to list
fastencode added to list
fasthr added to list
fasthr added to list
fastllama added to list
fastllama added to list
fastllama-python added to list
Return code != 200 - (404, 'fastllama-python')
fastllama-python added to list
Current number of candidates: 3500
fastllama-python-test added to list
fastllama-python-test added to list
fastlorachat added to list
fastlorachat added to list
fastmitoassembler added to list
fastmitoassembler added to list
fastpathplanning added to list
fastpathplanning added to list
fast-pq added to list
fast-pq added to list
fastq-handler added to list
fastq-handler added to list
fastql added to list
Return code != 200 - (404, 'fastql')
fastql added to list
fastqueue-lib added to list
fastqueue-lib added to list
fastsim added to list
fastsim added to list
fast-weather added to list
fast-weather added to list
fattools added to list
fattools added to list
fav-plots added to list
fav-plots added to list
fazy added

ftrixminer added to list
fuckvkeypad added to list
fuckvkeypad added to list
fulcrum-airflow added to list
fulcrum-airflow added to list
funciones-generales added to list
funciones-generales added to list
func-kit added to list
func-kit added to list
funclassify added to list
Return code != 200 - (404, 'funclassify')
funclassify added to list
funcpck added to list
funcpck added to list
function-tool added to list
function-tool added to list
funnylib added to list
funnylib added to list
furiosa-optimizer added to list
furiosa-optimizer added to list
furiosa-quantizer-impl added to list
furiosa-quantizer-impl added to list
fuse-client added to list
fuse-client added to list
fused added to list
fused added to list
fwdviewpy added to list
fwdviewpy added to list
fw-gear-cow-says added to list
fw-gear-cow-says added to list
fw-http-parser added to list
fw-http-parser added to list
fyp-clustering added to list
fyp-clustering added to list
fyp-crawler added to list
fyp-crawler added to list
g

Return code != 200 - (404, 'gnosai')
gnosis-neon-safe-eth-py added to list
Return code != 200 - (404, 'gns3wrapper')
Return code != 200 - (404, 'gnsspy')
Return code != 200 - (404, 'goanna')
goastpy added to list
Return code != 200 - (404, 'goat-qcontrol')
Return code != 200 - (404, 'goat-qcontrol-approx')
Return code != 200 - (404, 'gobuubs-ml-lib')
goby-sdk added to list
Return code != 200 - (404, 'gocms-api')
Return code != 200 - (404, 'godaddy-reseller')
Return code != 200 - (404, 'goddess')
Return code != 200 - (404, 'godfarmer')
Return code != 200 - (404, 'goding-last-lambda')
goeun added to list
Return code != 200 - (404, 'goev')
goev added to list
gofigr added to list
goku-k256 added to list
gokyuzu added to list
Return code != 200 - (404, 'goldenfast')
gold-miner added to list
gold-miner-ui added to list
Return code != 200 - (404, 'goldpot-ssenge')
golident added to list
Return code != 200 - (404, 'gompers')
Return code != 200 - (404, 'gong')
Return code != 200 - (404, 'gongbo

In [15]:
df.to_csv("log.csv", index=False)

In [16]:
df.query("new==1")

Unnamed: 0,pkg,new
16,0411-test,1.0
30,0staff,1.0
248,2d,1.0
297,3d,1.0
298,3d-bin-container-packing,1.0
...,...,...
132774,firemon-api,1.0
132790,firepup650,1.0
133110,fisinma,1.0
133173,fitk,1.0


In [5]:
df.query("new==new")

Unnamed: 0,pkg,new
0,0,0.0
1,0-0,0.0
2,000,0.0
3,00000a,0.0
4,0-0-1,0.0
...,...,...
3587,adr-ca,0.0
3588,adre,0.0
3589,adrenaline,0.0
3590,adresseparser,0.0


In [6]:
print(len(results))
results

85


['0411-test',
 '0411-test',
 '0staff',
 '0staff',
 '2d',
 '2d',
 '3d',
 '3d',
 '3d-bin-container-packing',
 '3d-bin-container-packing',
 '41datastructure',
 '41datastructure',
 '5345345345345345',
 '5345345345345345',
 'a2d',
 'a2d',
 'a3dj',
 'a3dj',
 'aa-charlink',
 'aa-charlink',
 'aadetools',
 'aadetools',
 'aadhar-ocr',
 'aadhar-ocr',
 'aafitrans',
 'aafitrans',
 'aapp2face',
 'aapp2face',
 'aaz-dev',
 'aaz-dev',
 'abbccd',
 'abbccd',
 'abcd-seth',
 'abcd-seth',
 'abch-tree-sitter',
 'abch-tree-sitter',
 'abch-tree-sitter-solidity',
 'abch-tree-sitter-solidity',
 'abcparse',
 'abcparse',
 'abmetaanalysis',
 'abmetaanalysis',
 'abrilskopsorting',
 'abrilskopsorting',
 'absl-extra',
 'absl-extra',
 'abstract-singleton',
 'academicdb',
 'acadia',
 'acb',
 'accentcolordetect',
 'accurating',
 'accure-line-seg',
 'accure-ocr-lineseg',
 'accure-ocr-seg',
 'achat',
 'acie',
 'aciembler',
 'aclib-builtins',
 'aclib-pip',
 'acme31',
 'acml',
 'acoustic-analyser',
 'acr122u-websocket',
 'ac

In [11]:
with open(f"new-packages-apri2023.json", 'w') as fh:
    json.dump(results, fh) 

In [12]:
with open(f"new-packages-apri2023.json", 'r') as fh:
    new_packages = json.load(fh) 
new_packages[:5]

['0411-test', '0411-test', '0staff', '0staff', '2d']