<a href="https://colab.research.google.com/github/pkaiser8/info-664-final/blob/main/PK_final_draft.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Three (or less) Randomized Records from the Collection of the Cooper Hewitt, Smithsonian Design Museum

In [176]:
# Import third party dictionaries.
import pandas as pd
import random

# Defining variables for the functions:
data_filepath = '/content/objects-refined.csv'
group_column = [user_selected_key_1, user_selected_key_2] # Creates a group with these two user inputted key selections entered above.

def load_and_group_data(data_filepath, group_column):
    """
    Loads data from a CSV file and groups it by a specified column.

    Arguments:
        data_filepath (string): The file or URL path to the CSV file.
        group_column (string): The key columns to group the data by.

    Returns:
        The grouped DataFrame.
    """
    objects_df = pd.read_csv(data_filepath, low_memory=False) # Read the data from the .CSV defined in data_filepath. Use low_memory=False to process entire file at once
    grouped_df = objects_df.groupby(group_column) # Use the groupby() method to group the rows in the DataFrame based on specific values in the two user defined key columns
    return grouped_df

# Allow the user to define the desired output by selecting from the key columns outlined in the .CSV file used.
print(f'Welcome to the Cooper Hewitt, Smithsonian Design Museum collections object randomizer. \nPlease see below a list of keys used to define objects in the collection .CSV file:\n')
print(objects_df.columns) # Prints the .CSV columns so the user can decide which to use.
print()
print(f'Select which keys you would like to pair together to randomly find three or less records which share \ncommon values from these elements.\nNote: Some key combinations work better than others. See which ones yield the best results\n')

# User inputted information for each desired key
user_selected_key_1 = input(f'Please enter one of the keys listed above. Remember to include everything between the quotes:\n')
print()
user_selected_key_2 = input(f'Please enter a second key:\n')
print()
print(f'You have selected "{user_selected_key_1}" and "{user_selected_key_2}" as your grouped keys.')

Welcome to the Cooper Hewitt, Smithsonian Design Museum collections object randomizer. 
Please see below a list of keys used to define objects in the collection .CSV file:

Index(['accession_number', 'creditline', 'date', 'decade', 'department_id',
       'description', 'dimensions', 'dimensions_raw', 'gallery_text', 'id',
       'inscribed', 'is_active', 'is_loan_object', 'justification',
       'label_text', 'markings', 'media_id', 'medium', 'on_display',
       'period_id', 'primary_image', 'primary_image2', 'provenance', 'signed',
       'title', 'title_raw', 'tms:id', 'tombstone', 'type', 'type_id', 'url',
       'videos', 'woe:country', 'woe:country_id', 'woe:country_name',
       'year_acquired', 'year_end', 'year_start'],
      dtype='object')

Select which keys you would like to pair together to randomly find three or less records which share 
common values from these elements.
Note: Some key combinations work better than others. See which ones yield the best results

Please e

In [182]:
num_records = 3 # This is the ideal number of records the function below should aim to return

def select_random_group_and_records(grouped_df, num_records=3):
    """
    Selects a random group and a specified number of random records from that group.

    Args:
        grouped_df: The grouped DataFrame.
        num_records (int, optional): The number of records to select. Defaults to 3.

    Returns:
        tuple: A tuple containing the selected group key and the random records.
        selected_group_key (string): The key of the selected group.
        random_records (DataFrame): The randomly selected records.
        An error message is printed if no records are found for the selected group.
    """
    group_keys = list(grouped_df.groups.keys()) # Generates a list of all unique group keys from the DataFrame (grouped_df) and call it group_keys.
    selected_group_key = random.choice(group_keys) # Randomly selects a group key to explore its values.

    try:
        selected_group_records = grouped_df.get_group(selected_group_key) # Extracts whole records belonging to the selected group
    except KeyError or UnboundLocalError:
        print(f"Could not find records since one group is blank, please run this cell again.\nYou may also reselect your two keys in the cell above and rerun both cells.\n")
        return selected_group_key, pd.DataFrame() # Returns an empty DataFrame if a KeyError occurs (indicating that no records were found)

    if len(selected_group_records) >= num_records: # Checks if the group has enough records for the desired sample size.
        random_records = selected_group_records.sample(n=num_records) # Randomly selects at most three records from the group.
    else:
        random_records = selected_group_records #If the group has fewer than three records, return all of them.

    return selected_group_key, random_records

# Call the functions directly to execute the code
grouped_df = load_and_group_data(data_filepath, group_column)
selected_group_key, random_records = select_random_group_and_records(grouped_df, num_records)

print("Selected Group Key:", selected_group_key)
if not random_records.empty:
  print("Randomly Selected Records:")
  print()
  print(random_records)

Selected Group Key: ('1984', 'Japan')
Randomly Selected Records:

      accession_number                creditline  date  decade  department_id  \
79698        1988-52-6                       NaN  1984  1980.0       35347493   
76003        1990-63-1  Gift of NUNO Corporation  1984  1980.0       35347501   
76440       1991-166-2   Gift of Yoshiko Ebihara  1984  1980.0       35347493   

                                             description  \
79698  Plaza in front of building with a large rounde...   
76003  Length of woven fabric with various textures a...   
76440                                 Dup. of 1991-166-1   

                                      dimensions  dimensions_raw gallery_text  \
79698  H x W: 46 x 71.5 cm (18 1/8 x 28 1/8 in.)             NaN          NaN   
76003  H x W: 345.4 x 87.6 cm (136 x 34 1/2 in.)             NaN          NaN   
76440                                        NaN             NaN          NaN   

             id  ...     type     type_id  

In [183]:
# Create a dictionary entry for each individual record called and compile them into a list

all_records_data = []

for column in random_records.values:
    record_data = {
        'Image': column[20],
        'Title': column[24],
        'Date': column[2],
        'Medium': column[17],
        'Dimensions': column[6],
        'Type': column[28],
        'Country': column[34],
        'Accession Number': column[0]
    }
# Appends all 1-3 records to a singular dictionary called all_records_data
    all_records_data.append(record_data)

In [184]:
# CSS styling for an HTML table to display all_records_data dictionaries defined above
html_table = """

<table style='border-collapse: separate; border-spacing: 10px; border: 2px solid #ddd;'>
    <tr>
        <th style='border: 2px dotted #fff; padding: 8px;'>Image</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Title</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Date</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Medium</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Dimensions</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Type</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Country</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Accession Number</th>
    </tr>"""

# Generates an HTML table to visually display the data compiled in the "all_records_data" list made above
# (1-3 records) stored in all_records_data list defined above

for record in all_records_data:
    html_table += "<tr>"

    # Check to see if the record contains a valid image link
    if 'Image' in record and pd.notna(record['Image']) != False:
      image_link = record['Image']
    else:
      # Use a placeholder image link to Cooper Hewitt logo if the 'Image' value is missing or empty
      image_link = 'https://upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Cooper_Hewitt%2C_Smithsonian_Design_Museum_logo.svg/320px-Cooper_Hewitt%2C_Smithsonian_Design_Museum_logo.svg.png'

    # Wrap the image in an <a> tag to create a link in the
    # image thumbnail to view the full size picture in a separate tab
    html_table += f"<td style='border: 1px solid #ddd; padding: 8px;'><a href='{image_link}' target='_blank'><img src='{image_link}' width='100'></a></td>"
    for key, value in record.items():
        if key != 'Image':  # Skip 'Image' as it's already handled
            html_table += f"<td style='border: 1px solid #ddd; padding: 8px;'>{value}</td>"
    html_table += "</tr>"

# Build the HTML table to display the data, print the groups pulled from selected_group_key variable defined above
print("Cooper Hewitt collection objects selected from the database with the following shared elements:")
print(f'{group_column[0]} = {selected_group_key[0]}')
print(f'{group_column[1]} = {selected_group_key[1]}')
print()

html_table += "</table>"

# Import the HTML from IPython.display
# Source: https://ipython.readthedocs.io/en/8.26.0/api/generated/IPython.display.html
from IPython.display import HTML
display(HTML(html_table))

Cooper Hewitt collection objects selected from the database with the following shared elements:
date = 1984
woe:country_name = Japan



Image,Title,Date,Medium,Dimensions,Type,Country,Accession Number
,"Drawing, Synaps Project, Tokyo: Plaza, 1984",1984,Graphite on white paper,H x W: 46 x 71.5 cm (18 1/8 x 28 1/8 in.),Drawing,Japan,1988-52-6
,"Textile, Stone Wall, 1984",1984,"Wool, cotton",H x W: 345.4 x 87.6 cm (136 x 34 1/2 in.),Textile,Japan,1990-63-1
,"Poster, ""From Pushpin to Architecture"", 1984",1984,Offset lithograph on paper,,Poster,Japan,1991-166-2
