In [13]:
import requests
import json
import plotly.express as px
import pandas as pd
import datetime
from numpy import ones,vstack
from numpy.linalg import lstsq

In [14]:
client_id = '07bd2676-f950-48c0-8b12-ebd5e8b1491d'
client_secret = 'b874aa02-4333-4a4a-be10-55b44fc94eb3'
owner = "gitfeedV8"
thing = "github"
nodes = ['pulls', 'issues', 'commits']
# nodes = ['pulls', 'issues']
start_date = '2020-09-01T00:00:00-03:00'
end_date = '2021-09-15T23:59:59-03:00'

date_format = '%Y-%m-%dT%H:%M:%S%z'

In [15]:
def auth():
    _auth_url = 'http://agrows-keycloak.labbs.com.br/auth/realms/agroWS/protocol/openid-connect/token'
    _auth_data = {
        'client_id': client_id,
        'client_secret': client_secret,
        'grant_type': 'client_credentials'
    }
    _auth_headers = {
        'Content-Type': 'application/x-www-form-urlencoded'
    }
    _auth_request = requests.post(_auth_url, data=_auth_data, headers=_auth_headers)
    return json.loads(_auth_request.text)['access_token']

In [16]:
def load_data(_owner, _thing, _node, _token, _start_date, _end_date):
    _data_api_url = f'https://agrows-data-api.labbs.com.br/v1/owner/{_owner}/thing/{_thing}/node/{_node}'
    _data_api_params = {
        'startDateTime': _start_date,
        'endDateTime': _end_date
    }
    _data_api_headers = {
        'content-type': 'application/json',
        'Authorization' : f'Bearer {_token}'
    }
    _data_api_request = requests.get(_data_api_url, params=_data_api_params, headers=_data_api_headers)
    _data = json.loads(_data_api_request.text)['data']
    # data_source = json.dumps(data, indent=4, sort_keys=True)
    return _data

In [17]:
def load_mongo_data(_repo_list):
    _data_api_url = f'http://localhost:3005/mongo/data'
    _data_api_params = {
        'repoList': _repo_list
    }
    _data_api_headers = {
        'content-type': 'application/json'
    }
    _data_api_request = requests.get(_data_api_url, json=_data_api_params, headers=_data_api_headers)
    _data = json.loads(_data_api_request.text)
    # _data_source = json.dumps(_data, indent=4, sort_keys=True)
    if 'data' in _data:
        return _data['data']
    return _data

In [18]:
def load_raw_data(url):
    _data_api_headers = {
        'content-type': 'application/json'
    }
    _data_api_request = requests.get(url, headers=_data_api_headers)
    _data = json.loads(_data_api_request.text)
    return _data

In [19]:
metrics = {
    "commitFrequency": [[366,0], [168,40], [48,60], [6,100]],
    "issuesRecent": [[720,0],[360,20],[160,50],[48,80],[24,100]]
}

def format_date(_date_string):
    _date_obj = datetime.datetime.strptime(_date_string, date_format)
    return _date_obj

def get_metric_percent(_metric, _value):
    _btw_index = -1 # Last array position
    for i, v in enumerate(metrics[_metric]):
        if _value >= v[0]:
            _btw_index = i
            break
    if _btw_index == 0:
        return metrics[_metric][0][1] # returning minimal score
    elif _btw_index == -1:
        return metrics[_metric][-1][1] # returning maximal score

    _point_0 = (metrics[_metric][i-1][0], metrics[_metric][i-1][1])
    _point_1 = (metrics[_metric][i][0], metrics[_metric][i][1])
    _points = [_point_0,_point_1]
    _x_coords, _y_coords = zip(*_points)
    _A = vstack([_x_coords,ones(len(_x_coords))]).T
    _x, _c = lstsq(_A, _y_coords)[0]

    _score = _x * _value + _c

    return _score

def calc_frequency(_list):
#     sort by date
#     print(_list)
    if len(_list) == 0:
        return 0
    _frequency = 0
    for index, value in enumerate(_list):
        if index == 0:
            _date_a = datetime.datetime.now().replace(tzinfo=datetime.timezone.utc).astimezone(tz=None)
        else:
            _date_a = format_date(_list[index-1][0])
        _date_b = format_date(_list[index][0])
        _delta = _date_a - _date_b
        if _delta.total_seconds() < 0:
            _delta = _date_b - _date_a
        # print("Delta: ",_date_a, _date_b, _delta)
        _frequency = _frequency + _delta.total_seconds()
    _frequency = (_frequency/len(_list)) / 60 / 60
    return _frequency

def calc_score(_list, _metric):
    _frequency = calc_frequency(_list)
    _score = get_metric_percent(_metric, _frequency)
    return _score


def calc_popularity(_qty):
    if _qty < 20:
        return 10
    elif _qty in range(20, 49):
        return 30
    elif _qty in range(50, 99):
        return 60
    elif _qty >= 100:
        return 90

In [20]:
access_token = auth()

data = {}
for node in nodes:
    raw_data = load_data(owner, thing, node, access_token, start_date, end_date)
    raw_data = [{'dateTime': x['dateTime'], **x.get('attributes')} for x in raw_data]
    for r in raw_data:
        if 'dono' not in r:
            del r
            continue
        r['owner'] = r.pop('dono')
        for k in ['owner', 'name', 'participants', 'comments', 'author', 'labels', 'message']:
            if k in r and ':' in r[k]:
                r[k] = r[k].split(":")[1]
            if k in r and r[k] == 'no-string':
                r[k] = ''
    # Keeping last version of itens
    if node in 'commits':
        data[node] = pd.DataFrame.from_dict(raw_data)
    else:
        data[node] = pd.DataFrame.from_dict(raw_data).sort_values('dateTime').groupby('number').tail(1)

In [21]:
# Getting owner and name from all nodes to request data from Mongo
frames = []
for k in data:
    frames.append(data[k][['owner', 'name']])

result = pd.concat(frames).drop_duplicates().to_json(orient="table", index=None)
repoList = json.loads(result)['data']

code_data = load_mongo_data(repoList)
data['code'] = pd.DataFrame.from_dict(code_data)

In [22]:
# We have data[node] with a clear data frame

for _, repo in data['code'].iterrows():
    # if repo['name'] != 'xls':
    #     continue
    # print(repo['owner'] + '/' + repo['name'])
    repoIssues = data['issues'].loc[(data['issues'].owner == repo['owner']) &  (data['issues'].name == repo['name'])]
    # print(repoIssues)
    issuesQty = repoIssues.count()['dateTime']
    # print("issuesQty:", issuesQty)
    participantsTotalCount = repoIssues['participantsTotalCount'].astype(int).sum()
    # print("participantsTotalCount:", participantsTotalCount)
    repoPulls = data['pulls'].loc[(data['pulls'].owner == repo['owner']) &  (data['pulls'].name == repo['name'])]
    # print(repoPulls)
    openPullsQty = repoPulls.loc[repoPulls['state'] == 'OPEN'].count()['dateTime']
    # print("openPullsQty:", openPullsQty)
    popularity = calc_popularity(issuesQty)
    popularity += calc_popularity(participantsTotalCount)
    popularity += calc_popularity(openPullsQty)
    popularity = popularity/3
    # print("Popularity:", popularity)
    # print('##########################')

    repoCommits = data['commits'].loc[(data['commits'].owner == repo['owner']) &  (data['commits'].name == repo['name'])]
    # print("Commits:", repoCommits[['committedDate']].values)
    frequency = calc_score(repoCommits[['committedDate']].values,'commitFrequency' )
    frequency += calc_score(repoIssues[['createdAt']].values,'issuesRecent' )
    frequency += calc_score(repoIssues.loc[repoIssues['state'] == 'CLOSED'][['closedAt']].values,'issuesRecent' )
    frequency = frequency / 3
    # print("Frequency:", frequency)
    # print('##########################')


    definitionOSS = 0
    if repo['licenseInfo'] != "":
        definitionOSS = 100

    df = pd.DataFrame(dict(
        r=[frequency, definitionOSS, 5, popularity, 5],
        theta=['Frequency','Definition of OSS','Friendly',
               'Popularity', 'Quality']))
    fig = px.line_polar(df, r='r', theta='theta', line_close=True)
    fig.update_layout(
        title = repo['owner'] + '/' + repo['name'],
        polar=dict(
            radialaxis=dict(
            visible=True,
              range=[0, 100]
            )),
        showlegend=False
    )
    fig.show()

# df = pd.DataFrame(dict(
#     r=[1, 5, 5, 2, 3],
#     theta=['Frequency','Definition of OSS','Friendly',
#            'Popularity', 'Quality']))
# fig = px.line_polar(df, r='r', theta='theta', line_close=True)
# fig.show()


`rcond` parameter will change to the default of machine precision times ``max(M, N)`` where M and N are the input matrix dimensions.

