### Get Scripts From ICPSR

In [1]:
import os
import getpass
import time
import requests
from zipfile import ZipFile
from bs4 import BeautifulSoup

In [2]:
def icpsr_download(file_id, email=None, password=None, reset=False, download_dir="icpsr_data", msg=True, unzip=True, delete_zip=None):
    # Detect login info
    if reset:
        email = password = None
    
    if email is None:
        email = os.getenv("icpsr_email")
        if not email:
            email = input("ICPSR requires your user account information. Please enter your email address:\n")
    
    if password is None:
        password = os.getenv("icpsr_password")
        if not password:
            password = getpass.getpass("Please enter your ICPSR password:\n")
    
    # Get list of current download directory contents
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)
    dd_old = os.listdir(download_dir)
    
    # Loop through files
    for item in file_id:
        # show process
        if msg:
            print(f"Downloading ICPSR file: {item} ({str(time.time())})")
        
        # build url
        url = f"https://www.openicpsr.org/openicpsr/project/195984/version/V1/download/project?dirPath=/openicpsr/195984/fcr:versions/V1"
        
        # Set up session
        session = requests.Session()
        
        # Login
        #login_data = {'email': email, 'password': password }
        #login_url = 'https://www.openicpsr.org/openicpsr/login'
        login_data = {'email': email, 'password': password, 'path': 'ICPSR', 'noautoguest': '',
                      'request_uri': 'https://www.icpsr.umich.edu/oauth/redir?callback=https%3A%2F%2Fwww.openicpsr.org%2Fopenicpsr%2Foauth%2Fcallback&client_key=dRhKh4XnVucqtxeM1pCBiAwioabpwp&path=ICPSR&duo=',
                      'Log In': 'Log In'}
        login_url = 'https://www.icpsr.umich.edu/rpxlogin'
        r = session.post(login_url, data=login_data)

        url2 = r.headers['Refresh'].split('URL=')[1]
        
        r = session.get(url2)
        print(r.status_code, r.headers)

        url2 = r.headers['Refresh'].split('URL=')[1]
        
        r = session.get(url2)
        
        # Download the file
        response = session.get(url)
        
        # Save the file
        file_name = f"ICPSR_{str(item).zfill(5)}.zip"
        file_path = os.path.join(download_dir, file_name)
        with open(file_path, 'wb') as f:
            f.write(response.content)
        
        # Wait for the download to complete (adjust the sleep time if needed)
        time.sleep(10)
        
        # Unzip if specified
        if unzip:
            with ZipFile(file_path, 'r') as zip_ref:
                zip_ref.extractall(download_dir)
        
        # Delete zip if specified
        if delete_zip:
            os.remove(file_path)

In [4]:
# Open the file for reading
with open("icpsr_creds.txt", "r") as file:
    # Read the lines of the file
    lines = file.readlines()

# Process each line
for line in lines:
    # Split the line into username and password using comma as the separator
    username, password = line.strip().split(',')

In [None]:
icpsr_download([195984], email=username, password = password)

Downloading ICPSR file: 195984 (1705795911.5230305)
200 {'Date': 'Sun, 21 Jan 2024 00:11:51 GMT', 'Server': 'Apache/2.2', 'Refresh': '0; URL=https://www.icpsr.umich.edu/oauth/redir?callback=https%3A%2F%2Fwww.openicpsr.org%2Fopenicpsr%2Foauth%2Fcallback&client_key=dRhKh4XnVucqtxeM1pCBiAwioabpwp&path=ICPSR&duo=', 'Set-Cookie': 'Ticket=hash&4001f95db40ef1dda4f97ec49141b5e6&time&1705795912&ip&185.250.36.73&user&156252&expires&1705828312; domain=.icpsr.umich.edu; path=/; HttpOnly', 'Access-Control-Allow-Methods': 'POST, GET, OPTIONS, DELETE, PUT', 'Access-Control-Max-Age': '1000', 'Access-Control-Allow-Headers': 'X-Requested-With, Content-Type, Origin, Authorization, Accept, Client-Security-Token, Accept-Encoding', 'X-UA-Compatible': 'IE=Edge', 'Content-Length': '0', 'Keep-Alive': 'timeout=5, max=100', 'Connection': 'Keep-Alive', 'Content-Type': 'text/html; charset=ISO-8859-1'}
Yippy! Authentication successful!
https://www.icpsr.umich.edu/oauth/redir?callback=https%3A%2F%2Fwww.openicpsr.org