# Import Libraries

In [None]:
import requests
import json
import pandas as pd
import datetime

# Auth Credentials, Get Total Pages

In [None]:
# Auth credentials
auth = ('username', 'api_key')

# Initial call to get total issues number
response = requests.get('https://mysite.atlassian.net/rest/api/2/search?jql=project="JIRA"&startAt=50&maxResults=1', auth = auth)

# Final offset number
total_pages = json.loads(response.text)['total']

# Get Jira Data

In [None]:
'''Paginate to get all data, convert from JSON to Dataframe, flatten the JSON columns'''
# Empty Dataframe to Append to
issues_df = pd.DataFrame()

# Start off by defining the offset at 0 
start_at = 0

# While offset is < final_offset, run this loop
while start_at < total_pages:
    
    # Take the offset from above and tag it onto the end of this url, converting the data to a JSON
    issues = requests.get(f'https://mysite.atlassian.net/rest/api/2/search?jql=project="JIRA"&startAt={start_at}&maxResults=100', auth = auth)

    # Convert to JSON
    issues = json.loads(issues.text)
    
    # Convert to pandas dataframe
    issues = pd.json_normalize(issues['issues'])
    
    # Append dataframe to Master DF
    issues_df = issues_df.append(issues)
    
    # Add 50 to the start_at value
    start_at += 100

# Data Cleansing

In [None]:
''' Remove all Subtasks '''
# Filter out rows by column value
issues_clean = issues_df.loc[issues_df['IsSubtask'] == False]

In [None]:
''' Select Columns '''
# Define the list of columns we want
column_list = ['Assignee', 'ResolutionDate', 'Status', 'StoryPointEstimate', 'StoryPointActual', 'Requestor', 'StartDate',
               'Components', 'IssueType', 'Summary', 'CreateDate', 'EpicLink', 'Priority', 'IssueKey', 'DueDate'
              ]

# Create a new dataframe with only columns from defined list
issues_clean = issues_clean[issues_clean.columns.intersection(column_list)]

In [None]:
''' Convert to Datetime '''
# Create list of columns to convert to dates
date_columns = ['StartDate', 'DueDate', 'CreateDate']

# Loop through each column in the list
for date in date_columns:
    
    # Apply datetime function to column
    issues_clean[date] = pd.to_datetime(issues_clean[date], format='%Y-%m-%d', errors='ignore')

In [None]:
''' Remove newlines '''
issues_clean['Description'] = issues_clean['Description'].replace(r'\n', ' ', regex = True)